aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiklos Szeredi <mszeredi@suse.cz>2008-11-09 09:23:57 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-11-09 14:17:33 -0500
commit6209344f5a3795d34b7f2c0061f49802283b6bdd (patch)
tree5c037ddbb8caac17b0c6101c9ab86387df106d41
parent058e3739f6b0753696db1952378de9e8d2a11735 (diff)
net: unix: fix inflight counting bug in garbage collector
Previously I assumed that the receive queues of candidates don't change during the GC. This is only half true, nothing can be received from the queues (see comment in unix_gc()), but buffers could be added through the other half of the socket pair, which may still have file descriptors referring to it. This can result in inc_inflight_move_tail() erronously increasing the "inflight" counter for a unix socket for which dec_inflight() wasn't previously called. This in turn can trigger the "BUG_ON(total_refs < inflight_refs)" in a later garbage collection run. Fix this by only manipulating the "inflight" counter for sockets which are candidates themselves. Duplicating the file references in unix_attach_fds() is also needed to prevent a socket becoming a candidate for GC while the skb that contains it is not yet queued. Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> CC: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/net/af_unix.h1
-rw-r--r--net/unix/af_unix.c31
-rw-r--r--net/unix/garbage.c49
3 files changed, 62 insertions, 19 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 7dd29b7e461d..c29ff1da8a18 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -54,6 +54,7 @@ struct unix_sock {
54 atomic_long_t inflight; 54 atomic_long_t inflight;
55 spinlock_t lock; 55 spinlock_t lock;
56 unsigned int gc_candidate : 1; 56 unsigned int gc_candidate : 1;
57 unsigned int gc_maybe_cycle : 1;
57 wait_queue_head_t peer_wait; 58 wait_queue_head_t peer_wait;
58}; 59};
59#define unix_sk(__sk) ((struct unix_sock *)__sk) 60#define unix_sk(__sk) ((struct unix_sock *)__sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4d3c6071b9a4..eb90f77bb0e2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb)
1302 sock_wfree(skb); 1302 sock_wfree(skb);
1303} 1303}
1304 1304
1305static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1305static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1306{ 1306{
1307 int i; 1307 int i;
1308
1309 /*
1310 * Need to duplicate file references for the sake of garbage
1311 * collection. Otherwise a socket in the fps might become a
1312 * candidate for GC while the skb is not yet queued.
1313 */
1314 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1315 if (!UNIXCB(skb).fp)
1316 return -ENOMEM;
1317
1308 for (i=scm->fp->count-1; i>=0; i--) 1318 for (i=scm->fp->count-1; i>=0; i--)
1309 unix_inflight(scm->fp->fp[i]); 1319 unix_inflight(scm->fp->fp[i]);
1310 UNIXCB(skb).fp = scm->fp;
1311 skb->destructor = unix_destruct_fds; 1320 skb->destructor = unix_destruct_fds;
1312 scm->fp = NULL; 1321 return 0;
1313} 1322}
1314 1323
1315/* 1324/*
@@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1368 goto out; 1377 goto out;
1369 1378
1370 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1379 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1371 if (siocb->scm->fp) 1380 if (siocb->scm->fp) {
1372 unix_attach_fds(siocb->scm, skb); 1381 err = unix_attach_fds(siocb->scm, skb);
1382 if (err)
1383 goto out_free;
1384 }
1373 unix_get_secdata(siocb->scm, skb); 1385 unix_get_secdata(siocb->scm, skb);
1374 1386
1375 skb_reset_transport_header(skb); 1387 skb_reset_transport_header(skb);
@@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1538 size = min_t(int, size, skb_tailroom(skb)); 1550 size = min_t(int, size, skb_tailroom(skb));
1539 1551
1540 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1552 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1541 if (siocb->scm->fp) 1553 if (siocb->scm->fp) {
1542 unix_attach_fds(siocb->scm, skb); 1554 err = unix_attach_fds(siocb->scm, skb);
1555 if (err) {
1556 kfree_skb(skb);
1557 goto out_err;
1558 }
1559 }
1543 1560
1544 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { 1561 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1545 kfree_skb(skb); 1562 kfree_skb(skb);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2a27b84f740b..6d4a9a8de5ef 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
186 */ 186 */
187 struct sock *sk = unix_get_socket(*fp++); 187 struct sock *sk = unix_get_socket(*fp++);
188 if (sk) { 188 if (sk) {
189 hit = true; 189 struct unix_sock *u = unix_sk(sk);
190 func(unix_sk(sk)); 190
191 /*
192 * Ignore non-candidates, they could
193 * have been added to the queues after
194 * starting the garbage collection
195 */
196 if (u->gc_candidate) {
197 hit = true;
198 func(u);
199 }
191 } 200 }
192 } 201 }
193 if (hit && hitlist != NULL) { 202 if (hit && hitlist != NULL) {
@@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u)
249{ 258{
250 atomic_long_inc(&u->inflight); 259 atomic_long_inc(&u->inflight);
251 /* 260 /*
252 * If this is still a candidate, move it to the end of the 261 * If this still might be part of a cycle, move it to the end
253 * list, so that it's checked even if it was already passed 262 * of the list, so that it's checked even if it was already
254 * over 263 * passed over
255 */ 264 */
256 if (u->gc_candidate) 265 if (u->gc_maybe_cycle)
257 list_move_tail(&u->link, &gc_candidates); 266 list_move_tail(&u->link, &gc_candidates);
258} 267}
259 268
@@ -267,6 +276,7 @@ void unix_gc(void)
267 struct unix_sock *next; 276 struct unix_sock *next;
268 struct sk_buff_head hitlist; 277 struct sk_buff_head hitlist;
269 struct list_head cursor; 278 struct list_head cursor;
279 LIST_HEAD(not_cycle_list);
270 280
271 spin_lock(&unix_gc_lock); 281 spin_lock(&unix_gc_lock);
272 282
@@ -282,10 +292,14 @@ void unix_gc(void)
282 * 292 *
283 * Holding unix_gc_lock will protect these candidates from 293 * Holding unix_gc_lock will protect these candidates from
284 * being detached, and hence from gaining an external 294 * being detached, and hence from gaining an external
285 * reference. This also means, that since there are no 295 * reference. Since there are no possible receivers, all
286 * possible receivers, the receive queues of these sockets are 296 * buffers currently on the candidates' queues stay there
287 * static during the GC, even though the dequeue is done 297 * during the garbage collection.
288 * before the detach without atomicity guarantees. 298 *
299 * We also know that no new candidate can be added onto the
300 * receive queues. Other, non candidate sockets _can_ be
301 * added to queue, so we must make sure only to touch
302 * candidates.
289 */ 303 */
290 list_for_each_entry_safe(u, next, &gc_inflight_list, link) { 304 list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
291 long total_refs; 305 long total_refs;
@@ -299,6 +313,7 @@ void unix_gc(void)
299 if (total_refs == inflight_refs) { 313 if (total_refs == inflight_refs) {
300 list_move_tail(&u->link, &gc_candidates); 314 list_move_tail(&u->link, &gc_candidates);
301 u->gc_candidate = 1; 315 u->gc_candidate = 1;
316 u->gc_maybe_cycle = 1;
302 } 317 }
303 } 318 }
304 319
@@ -325,14 +340,24 @@ void unix_gc(void)
325 list_move(&cursor, &u->link); 340 list_move(&cursor, &u->link);
326 341
327 if (atomic_long_read(&u->inflight) > 0) { 342 if (atomic_long_read(&u->inflight) > 0) {
328 list_move_tail(&u->link, &gc_inflight_list); 343 list_move_tail(&u->link, &not_cycle_list);
329 u->gc_candidate = 0; 344 u->gc_maybe_cycle = 0;
330 scan_children(&u->sk, inc_inflight_move_tail, NULL); 345 scan_children(&u->sk, inc_inflight_move_tail, NULL);
331 } 346 }
332 } 347 }
333 list_del(&cursor); 348 list_del(&cursor);
334 349
335 /* 350 /*
351 * not_cycle_list contains those sockets which do not make up a
352 * cycle. Restore these to the inflight list.
353 */
354 while (!list_empty(&not_cycle_list)) {
355 u = list_entry(not_cycle_list.next, struct unix_sock, link);
356 u->gc_candidate = 0;
357 list_move_tail(&u->link, &gc_inflight_list);
358 }
359
360 /*
336 * Now gc_candidates contains only garbage. Restore original 361 * Now gc_candidates contains only garbage. Restore original
337 * inflight counters for these as well, and remove the skbuffs 362 * inflight counters for these as well, and remove the skbuffs
338 * which are creating the cycle(s). 363 * which are creating the cycle(s).