diff options
Diffstat (limited to 'net/unix')
| -rw-r--r-- | net/unix/af_unix.c | 35 | ||||
| -rw-r--r-- | net/unix/garbage.c | 62 |
2 files changed, 74 insertions, 23 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dc504d308ec0..66d5ac4773ab 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
| @@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb) | |||
| 1302 | sock_wfree(skb); | 1302 | sock_wfree(skb); |
| 1303 | } | 1303 | } |
| 1304 | 1304 | ||
| 1305 | static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) | 1305 | static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) |
| 1306 | { | 1306 | { |
| 1307 | int i; | 1307 | int i; |
| 1308 | |||
| 1309 | /* | ||
| 1310 | * Need to duplicate file references for the sake of garbage | ||
| 1311 | * collection. Otherwise a socket in the fps might become a | ||
| 1312 | * candidate for GC while the skb is not yet queued. | ||
| 1313 | */ | ||
| 1314 | UNIXCB(skb).fp = scm_fp_dup(scm->fp); | ||
| 1315 | if (!UNIXCB(skb).fp) | ||
| 1316 | return -ENOMEM; | ||
| 1317 | |||
| 1308 | for (i=scm->fp->count-1; i>=0; i--) | 1318 | for (i=scm->fp->count-1; i>=0; i--) |
| 1309 | unix_inflight(scm->fp->fp[i]); | 1319 | unix_inflight(scm->fp->fp[i]); |
| 1310 | UNIXCB(skb).fp = scm->fp; | ||
| 1311 | skb->destructor = unix_destruct_fds; | 1320 | skb->destructor = unix_destruct_fds; |
| 1312 | scm->fp = NULL; | 1321 | return 0; |
| 1313 | } | 1322 | } |
| 1314 | 1323 | ||
| 1315 | /* | 1324 | /* |
| @@ -1334,6 +1343,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1334 | 1343 | ||
| 1335 | if (NULL == siocb->scm) | 1344 | if (NULL == siocb->scm) |
| 1336 | siocb->scm = &tmp_scm; | 1345 | siocb->scm = &tmp_scm; |
| 1346 | wait_for_unix_gc(); | ||
| 1337 | err = scm_send(sock, msg, siocb->scm); | 1347 | err = scm_send(sock, msg, siocb->scm); |
| 1338 | if (err < 0) | 1348 | if (err < 0) |
| 1339 | return err; | 1349 | return err; |
| @@ -1368,8 +1378,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1368 | goto out; | 1378 | goto out; |
| 1369 | 1379 | ||
| 1370 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | 1380 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); |
| 1371 | if (siocb->scm->fp) | 1381 | if (siocb->scm->fp) { |
| 1372 | unix_attach_fds(siocb->scm, skb); | 1382 | err = unix_attach_fds(siocb->scm, skb); |
| 1383 | if (err) | ||
| 1384 | goto out_free; | ||
| 1385 | } | ||
| 1373 | unix_get_secdata(siocb->scm, skb); | 1386 | unix_get_secdata(siocb->scm, skb); |
| 1374 | 1387 | ||
| 1375 | skb_reset_transport_header(skb); | 1388 | skb_reset_transport_header(skb); |
| @@ -1481,6 +1494,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1481 | 1494 | ||
| 1482 | if (NULL == siocb->scm) | 1495 | if (NULL == siocb->scm) |
| 1483 | siocb->scm = &tmp_scm; | 1496 | siocb->scm = &tmp_scm; |
| 1497 | wait_for_unix_gc(); | ||
| 1484 | err = scm_send(sock, msg, siocb->scm); | 1498 | err = scm_send(sock, msg, siocb->scm); |
| 1485 | if (err < 0) | 1499 | if (err < 0) |
| 1486 | return err; | 1500 | return err; |
| @@ -1538,8 +1552,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1538 | size = min_t(int, size, skb_tailroom(skb)); | 1552 | size = min_t(int, size, skb_tailroom(skb)); |
| 1539 | 1553 | ||
| 1540 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | 1554 | memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); |
| 1541 | if (siocb->scm->fp) | 1555 | if (siocb->scm->fp) { |
| 1542 | unix_attach_fds(siocb->scm, skb); | 1556 | err = unix_attach_fds(siocb->scm, skb); |
| 1557 | if (err) { | ||
| 1558 | kfree_skb(skb); | ||
| 1559 | goto out_err; | ||
| 1560 | } | ||
| 1561 | } | ||
| 1543 | 1562 | ||
| 1544 | if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { | 1563 | if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { |
| 1545 | kfree_skb(skb); | 1564 | kfree_skb(skb); |
| @@ -2213,7 +2232,7 @@ static int unix_net_init(struct net *net) | |||
| 2213 | #endif | 2232 | #endif |
| 2214 | error = 0; | 2233 | error = 0; |
| 2215 | out: | 2234 | out: |
| 2216 | return 0; | 2235 | return error; |
| 2217 | } | 2236 | } |
| 2218 | 2237 | ||
| 2219 | static void unix_net_exit(struct net *net) | 2238 | static void unix_net_exit(struct net *net) |
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2a27b84f740b..abb3ab34cb1e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
| @@ -80,6 +80,7 @@ | |||
| 80 | #include <linux/file.h> | 80 | #include <linux/file.h> |
| 81 | #include <linux/proc_fs.h> | 81 | #include <linux/proc_fs.h> |
| 82 | #include <linux/mutex.h> | 82 | #include <linux/mutex.h> |
| 83 | #include <linux/wait.h> | ||
| 83 | 84 | ||
| 84 | #include <net/sock.h> | 85 | #include <net/sock.h> |
| 85 | #include <net/af_unix.h> | 86 | #include <net/af_unix.h> |
| @@ -91,6 +92,7 @@ | |||
| 91 | static LIST_HEAD(gc_inflight_list); | 92 | static LIST_HEAD(gc_inflight_list); |
| 92 | static LIST_HEAD(gc_candidates); | 93 | static LIST_HEAD(gc_candidates); |
| 93 | static DEFINE_SPINLOCK(unix_gc_lock); | 94 | static DEFINE_SPINLOCK(unix_gc_lock); |
| 95 | static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); | ||
| 94 | 96 | ||
| 95 | unsigned int unix_tot_inflight; | 97 | unsigned int unix_tot_inflight; |
| 96 | 98 | ||
| @@ -186,8 +188,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | |||
| 186 | */ | 188 | */ |
| 187 | struct sock *sk = unix_get_socket(*fp++); | 189 | struct sock *sk = unix_get_socket(*fp++); |
| 188 | if (sk) { | 190 | if (sk) { |
| 189 | hit = true; | 191 | struct unix_sock *u = unix_sk(sk); |
| 190 | func(unix_sk(sk)); | 192 | |
| 193 | /* | ||
| 194 | * Ignore non-candidates, they could | ||
| 195 | * have been added to the queues after | ||
| 196 | * starting the garbage collection | ||
| 197 | */ | ||
| 198 | if (u->gc_candidate) { | ||
| 199 | hit = true; | ||
| 200 | func(u); | ||
| 201 | } | ||
| 191 | } | 202 | } |
| 192 | } | 203 | } |
| 193 | if (hit && hitlist != NULL) { | 204 | if (hit && hitlist != NULL) { |
| @@ -249,24 +260,29 @@ static void inc_inflight_move_tail(struct unix_sock *u) | |||
| 249 | { | 260 | { |
| 250 | atomic_long_inc(&u->inflight); | 261 | atomic_long_inc(&u->inflight); |
| 251 | /* | 262 | /* |
| 252 | * If this is still a candidate, move it to the end of the | 263 | * If this still might be part of a cycle, move it to the end |
| 253 | * list, so that it's checked even if it was already passed | 264 | * of the list, so that it's checked even if it was already |
| 254 | * over | 265 | * passed over |
| 255 | */ | 266 | */ |
| 256 | if (u->gc_candidate) | 267 | if (u->gc_maybe_cycle) |
| 257 | list_move_tail(&u->link, &gc_candidates); | 268 | list_move_tail(&u->link, &gc_candidates); |
| 258 | } | 269 | } |
| 259 | 270 | ||
| 260 | /* The external entry point: unix_gc() */ | 271 | static bool gc_in_progress = false; |
| 261 | 272 | ||
| 262 | void unix_gc(void) | 273 | void wait_for_unix_gc(void) |
| 263 | { | 274 | { |
| 264 | static bool gc_in_progress = false; | 275 | wait_event(unix_gc_wait, gc_in_progress == false); |
| 276 | } | ||
| 265 | 277 | ||
| 278 | /* The external entry point: unix_gc() */ | ||
| 279 | void unix_gc(void) | ||
| 280 | { | ||
| 266 | struct unix_sock *u; | 281 | struct unix_sock *u; |
| 267 | struct unix_sock *next; | 282 | struct unix_sock *next; |
| 268 | struct sk_buff_head hitlist; | 283 | struct sk_buff_head hitlist; |
| 269 | struct list_head cursor; | 284 | struct list_head cursor; |
| 285 | LIST_HEAD(not_cycle_list); | ||
| 270 | 286 | ||
| 271 | spin_lock(&unix_gc_lock); | 287 | spin_lock(&unix_gc_lock); |
| 272 | 288 | ||
| @@ -282,10 +298,14 @@ void unix_gc(void) | |||
| 282 | * | 298 | * |
| 283 | * Holding unix_gc_lock will protect these candidates from | 299 | * Holding unix_gc_lock will protect these candidates from |
| 284 | * being detached, and hence from gaining an external | 300 | * being detached, and hence from gaining an external |
| 285 | * reference. This also means, that since there are no | 301 | * reference. Since there are no possible receivers, all |
| 286 | * possible receivers, the receive queues of these sockets are | 302 | * buffers currently on the candidates' queues stay there |
| 287 | * static during the GC, even though the dequeue is done | 303 | * during the garbage collection. |
| 288 | * before the detach without atomicity guarantees. | 304 | * |
| 305 | * We also know that no new candidate can be added onto the | ||
| 306 | * receive queues. Other, non candidate sockets _can_ be | ||
| 307 | * added to queue, so we must make sure only to touch | ||
| 308 | * candidates. | ||
| 289 | */ | 309 | */ |
| 290 | list_for_each_entry_safe(u, next, &gc_inflight_list, link) { | 310 | list_for_each_entry_safe(u, next, &gc_inflight_list, link) { |
| 291 | long total_refs; | 311 | long total_refs; |
| @@ -299,6 +319,7 @@ void unix_gc(void) | |||
| 299 | if (total_refs == inflight_refs) { | 319 | if (total_refs == inflight_refs) { |
| 300 | list_move_tail(&u->link, &gc_candidates); | 320 | list_move_tail(&u->link, &gc_candidates); |
| 301 | u->gc_candidate = 1; | 321 | u->gc_candidate = 1; |
| 322 | u->gc_maybe_cycle = 1; | ||
| 302 | } | 323 | } |
| 303 | } | 324 | } |
| 304 | 325 | ||
| @@ -325,14 +346,24 @@ void unix_gc(void) | |||
| 325 | list_move(&cursor, &u->link); | 346 | list_move(&cursor, &u->link); |
| 326 | 347 | ||
| 327 | if (atomic_long_read(&u->inflight) > 0) { | 348 | if (atomic_long_read(&u->inflight) > 0) { |
| 328 | list_move_tail(&u->link, &gc_inflight_list); | 349 | list_move_tail(&u->link, ¬_cycle_list); |
| 329 | u->gc_candidate = 0; | 350 | u->gc_maybe_cycle = 0; |
| 330 | scan_children(&u->sk, inc_inflight_move_tail, NULL); | 351 | scan_children(&u->sk, inc_inflight_move_tail, NULL); |
| 331 | } | 352 | } |
| 332 | } | 353 | } |
| 333 | list_del(&cursor); | 354 | list_del(&cursor); |
| 334 | 355 | ||
| 335 | /* | 356 | /* |
| 357 | * not_cycle_list contains those sockets which do not make up a | ||
| 358 | * cycle. Restore these to the inflight list. | ||
| 359 | */ | ||
| 360 | while (!list_empty(¬_cycle_list)) { | ||
| 361 | u = list_entry(not_cycle_list.next, struct unix_sock, link); | ||
| 362 | u->gc_candidate = 0; | ||
| 363 | list_move_tail(&u->link, &gc_inflight_list); | ||
| 364 | } | ||
| 365 | |||
| 366 | /* | ||
| 336 | * Now gc_candidates contains only garbage. Restore original | 367 | * Now gc_candidates contains only garbage. Restore original |
| 337 | * inflight counters for these as well, and remove the skbuffs | 368 | * inflight counters for these as well, and remove the skbuffs |
| 338 | * which are creating the cycle(s). | 369 | * which are creating the cycle(s). |
| @@ -351,6 +382,7 @@ void unix_gc(void) | |||
| 351 | /* All candidates should have been detached by now. */ | 382 | /* All candidates should have been detached by now. */ |
| 352 | BUG_ON(!list_empty(&gc_candidates)); | 383 | BUG_ON(!list_empty(&gc_candidates)); |
| 353 | gc_in_progress = false; | 384 | gc_in_progress = false; |
| 385 | wake_up(&unix_gc_wait); | ||
| 354 | 386 | ||
| 355 | out: | 387 | out: |
| 356 | spin_unlock(&unix_gc_lock); | 388 | spin_unlock(&unix_gc_lock); |
