diff options
author | NeilBrown <neilb@suse.de> | 2007-02-08 17:20:30 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-09 12:25:47 -0500 |
commit | aaf68cfbf2241d24d46583423f6bff5c47e088b3 (patch) | |
tree | 65ca14b85d28b12da097d7d187cebfef88b5ba3a | |
parent | 387bb17374c5fa057462d00d4ba941d49f45de4d (diff) |
[PATCH] knfsd: fix a race in closing NFSd connections
If you lose this race, it can iput a socket inode twice and you get a BUG
in fs/inode.c
When I added the option for user-space to close a socket, I added some
cruft to svc_delete_socket so that I could call that function when closing
a socket per user-space request.
This was the wrong thing to do. I should have just set SK_CLOSE and let
normal mechanisms do the work.
Not only wrong, but buggy. The locking is all wrong and it openned up a
race where-by a socket could be closed twice.
So this patch:
Introduces svc_close_socket which sets SK_CLOSE then either leave
the close up to a thread, or calls svc_delete_socket if it can
get SK_BUSY.
Adds a bias to sk_busy which is removed when SK_DEAD is set,
This avoid races around shutting down the socket.
Changes several 'spin_lock' to 'spin_lock_bh' where the _bh
was missing.
Bugzilla-url: http://bugzilla.kernel.org/show_bug.cgi?id=7916
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/sunrpc/svcsock.h | 2 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 4 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 52 |
3 files changed, 41 insertions, 17 deletions
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 98b21ad370fd..db312a1e2eeb 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h | |||
@@ -63,7 +63,7 @@ struct svc_sock { | |||
63 | * Function prototypes. | 63 | * Function prototypes. |
64 | */ | 64 | */ |
65 | int svc_makesock(struct svc_serv *, int, unsigned short); | 65 | int svc_makesock(struct svc_serv *, int, unsigned short); |
66 | void svc_delete_socket(struct svc_sock *); | 66 | void svc_close_socket(struct svc_sock *); |
67 | int svc_recv(struct svc_rqst *, long); | 67 | int svc_recv(struct svc_rqst *, long); |
68 | int svc_send(struct svc_rqst *); | 68 | int svc_send(struct svc_rqst *); |
69 | void svc_drop(struct svc_rqst *); | 69 | void svc_drop(struct svc_rqst *); |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4c1611211119..c1f878131ac6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -386,7 +386,7 @@ svc_destroy(struct svc_serv *serv) | |||
386 | svsk = list_entry(serv->sv_tempsocks.next, | 386 | svsk = list_entry(serv->sv_tempsocks.next, |
387 | struct svc_sock, | 387 | struct svc_sock, |
388 | sk_list); | 388 | sk_list); |
389 | svc_delete_socket(svsk); | 389 | svc_close_socket(svsk); |
390 | } | 390 | } |
391 | if (serv->sv_shutdown) | 391 | if (serv->sv_shutdown) |
392 | serv->sv_shutdown(serv); | 392 | serv->sv_shutdown(serv); |
@@ -395,7 +395,7 @@ svc_destroy(struct svc_serv *serv) | |||
395 | svsk = list_entry(serv->sv_permsocks.next, | 395 | svsk = list_entry(serv->sv_permsocks.next, |
396 | struct svc_sock, | 396 | struct svc_sock, |
397 | sk_list); | 397 | sk_list); |
398 | svc_delete_socket(svsk); | 398 | svc_close_socket(svsk); |
399 | } | 399 | } |
400 | 400 | ||
401 | cache_clean_deferred(serv); | 401 | cache_clean_deferred(serv); |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ff1f8bf680aa..cf93cd1d857b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -62,6 +62,12 @@ | |||
62 | * after a clear, the socket must be read/accepted | 62 | * after a clear, the socket must be read/accepted |
63 | * if this succeeds, it must be set again. | 63 | * if this succeeds, it must be set again. |
64 | * SK_CLOSE can set at any time. It is never cleared. | 64 | * SK_CLOSE can set at any time. It is never cleared. |
65 | * sk_inuse contains a bias of '1' until SK_DEAD is set. | ||
66 | * so when sk_inuse hits zero, we know the socket is dead | ||
67 | * and no-one is using it. | ||
68 | * SK_DEAD can only be set while SK_BUSY is held which ensures | ||
69 | * no other thread will be using the socket or will try to | ||
70 | * set SK_DEAD. | ||
65 | * | 71 | * |
66 | */ | 72 | */ |
67 | 73 | ||
@@ -70,6 +76,7 @@ | |||
70 | 76 | ||
71 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, | 77 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, |
72 | int *errp, int pmap_reg); | 78 | int *errp, int pmap_reg); |
79 | static void svc_delete_socket(struct svc_sock *svsk); | ||
73 | static void svc_udp_data_ready(struct sock *, int); | 80 | static void svc_udp_data_ready(struct sock *, int); |
74 | static int svc_udp_recvfrom(struct svc_rqst *); | 81 | static int svc_udp_recvfrom(struct svc_rqst *); |
75 | static int svc_udp_sendto(struct svc_rqst *); | 82 | static int svc_udp_sendto(struct svc_rqst *); |
@@ -329,8 +336,9 @@ void svc_reserve(struct svc_rqst *rqstp, int space) | |||
329 | static inline void | 336 | static inline void |
330 | svc_sock_put(struct svc_sock *svsk) | 337 | svc_sock_put(struct svc_sock *svsk) |
331 | { | 338 | { |
332 | if (atomic_dec_and_test(&svsk->sk_inuse) && | 339 | if (atomic_dec_and_test(&svsk->sk_inuse)) { |
333 | test_bit(SK_DEAD, &svsk->sk_flags)) { | 340 | BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); |
341 | |||
334 | dprintk("svc: releasing dead socket\n"); | 342 | dprintk("svc: releasing dead socket\n"); |
335 | if (svsk->sk_sock->file) | 343 | if (svsk->sk_sock->file) |
336 | sockfd_put(svsk->sk_sock); | 344 | sockfd_put(svsk->sk_sock); |
@@ -520,7 +528,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
520 | 528 | ||
521 | if (!serv) | 529 | if (!serv) |
522 | return 0; | 530 | return 0; |
523 | spin_lock(&serv->sv_lock); | 531 | spin_lock_bh(&serv->sv_lock); |
524 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { | 532 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { |
525 | int onelen = one_sock_name(buf+len, svsk); | 533 | int onelen = one_sock_name(buf+len, svsk); |
526 | if (toclose && strcmp(toclose, buf+len) == 0) | 534 | if (toclose && strcmp(toclose, buf+len) == 0) |
@@ -528,12 +536,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
528 | else | 536 | else |
529 | len += onelen; | 537 | len += onelen; |
530 | } | 538 | } |
531 | spin_unlock(&serv->sv_lock); | 539 | spin_unlock_bh(&serv->sv_lock); |
532 | if (closesk) | 540 | if (closesk) |
533 | /* Should unregister with portmap, but you cannot | 541 | /* Should unregister with portmap, but you cannot |
534 | * unregister just one protocol... | 542 | * unregister just one protocol... |
535 | */ | 543 | */ |
536 | svc_delete_socket(closesk); | 544 | svc_close_socket(closesk); |
537 | else if (toclose) | 545 | else if (toclose) |
538 | return -ENOENT; | 546 | return -ENOENT; |
539 | return len; | 547 | return len; |
@@ -683,6 +691,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
683 | return svc_deferred_recv(rqstp); | 691 | return svc_deferred_recv(rqstp); |
684 | } | 692 | } |
685 | 693 | ||
694 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
695 | svc_delete_socket(svsk); | ||
696 | return 0; | ||
697 | } | ||
698 | |||
686 | clear_bit(SK_DATA, &svsk->sk_flags); | 699 | clear_bit(SK_DATA, &svsk->sk_flags); |
687 | while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { | 700 | while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { |
688 | if (err == -EAGAIN) { | 701 | if (err == -EAGAIN) { |
@@ -1176,7 +1189,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp) | |||
1176 | rqstp->rq_sock->sk_server->sv_name, | 1189 | rqstp->rq_sock->sk_server->sv_name, |
1177 | (sent<0)?"got error":"sent only", | 1190 | (sent<0)?"got error":"sent only", |
1178 | sent, xbufp->len); | 1191 | sent, xbufp->len); |
1179 | svc_delete_socket(rqstp->rq_sock); | 1192 | set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); |
1193 | svc_sock_enqueue(rqstp->rq_sock); | ||
1180 | sent = -EAGAIN; | 1194 | sent = -EAGAIN; |
1181 | } | 1195 | } |
1182 | return sent; | 1196 | return sent; |
@@ -1495,7 +1509,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, | |||
1495 | svsk->sk_odata = inet->sk_data_ready; | 1509 | svsk->sk_odata = inet->sk_data_ready; |
1496 | svsk->sk_owspace = inet->sk_write_space; | 1510 | svsk->sk_owspace = inet->sk_write_space; |
1497 | svsk->sk_server = serv; | 1511 | svsk->sk_server = serv; |
1498 | atomic_set(&svsk->sk_inuse, 0); | 1512 | atomic_set(&svsk->sk_inuse, 1); |
1499 | svsk->sk_lastrecv = get_seconds(); | 1513 | svsk->sk_lastrecv = get_seconds(); |
1500 | spin_lock_init(&svsk->sk_defer_lock); | 1514 | spin_lock_init(&svsk->sk_defer_lock); |
1501 | INIT_LIST_HEAD(&svsk->sk_deferred); | 1515 | INIT_LIST_HEAD(&svsk->sk_deferred); |
@@ -1618,7 +1632,7 @@ bummer: | |||
1618 | /* | 1632 | /* |
1619 | * Remove a dead socket | 1633 | * Remove a dead socket |
1620 | */ | 1634 | */ |
1621 | void | 1635 | static void |
1622 | svc_delete_socket(struct svc_sock *svsk) | 1636 | svc_delete_socket(struct svc_sock *svsk) |
1623 | { | 1637 | { |
1624 | struct svc_serv *serv; | 1638 | struct svc_serv *serv; |
@@ -1644,16 +1658,26 @@ svc_delete_socket(struct svc_sock *svsk) | |||
1644 | * while still attached to a queue, the queue itself | 1658 | * while still attached to a queue, the queue itself |
1645 | * is about to be destroyed (in svc_destroy). | 1659 | * is about to be destroyed (in svc_destroy). |
1646 | */ | 1660 | */ |
1647 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) | 1661 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { |
1662 | BUG_ON(atomic_read(&svsk->sk_inuse)<2); | ||
1663 | atomic_dec(&svsk->sk_inuse); | ||
1648 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | 1664 | if (test_bit(SK_TEMP, &svsk->sk_flags)) |
1649 | serv->sv_tmpcnt--; | 1665 | serv->sv_tmpcnt--; |
1666 | } | ||
1650 | 1667 | ||
1651 | /* This atomic_inc should be needed - svc_delete_socket | ||
1652 | * should have the semantic of dropping a reference. | ||
1653 | * But it doesn't yet.... | ||
1654 | */ | ||
1655 | atomic_inc(&svsk->sk_inuse); | ||
1656 | spin_unlock_bh(&serv->sv_lock); | 1668 | spin_unlock_bh(&serv->sv_lock); |
1669 | } | ||
1670 | |||
1671 | void svc_close_socket(struct svc_sock *svsk) | ||
1672 | { | ||
1673 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1674 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) | ||
1675 | /* someone else will have to effect the close */ | ||
1676 | return; | ||
1677 | |||
1678 | atomic_inc(&svsk->sk_inuse); | ||
1679 | svc_delete_socket(svsk); | ||
1680 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1657 | svc_sock_put(svsk); | 1681 | svc_sock_put(svsk); |
1658 | } | 1682 | } |
1659 | 1683 | ||