diff options
author | Tom Tucker <tom@opengridcomputing.com> | 2008-03-11 14:31:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-03-12 15:37:34 -0400 |
commit | c48cbb405c4f338ce3263c44d621eff41d9a95fc (patch) | |
tree | 9b40ce571009d8acae6b1023b5e50247d3f33d15 /net | |
parent | ee27a558ae0ff5063ccd6c47ca102c0bb0e3ba27 (diff) |
SVCRDMA: Add xprt refs to fix close/unmount crash
RDMA connection shutdown on an SMP machine can cause a kernel crash due
to the transport close path racing with the I/O tasklet.
Additional transport references were added as follows:
- A reference when on the DTO Q to avoid having the transport
deleted while queued for I/O.
- A reference while there is a QP able to generate events.
- A reference until the DISCONNECTED event is received on the CM ID
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 96 |
1 files changed, 58 insertions, 38 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f09444c451b..16fd3f6718f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -54,7 +54,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
54 | int flags); | 54 | int flags); |
55 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); | 55 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); |
56 | static void svc_rdma_release_rqst(struct svc_rqst *); | 56 | static void svc_rdma_release_rqst(struct svc_rqst *); |
57 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt); | ||
58 | static void dto_tasklet_func(unsigned long data); | 57 | static void dto_tasklet_func(unsigned long data); |
59 | static void svc_rdma_detach(struct svc_xprt *xprt); | 58 | static void svc_rdma_detach(struct svc_xprt *xprt); |
60 | static void svc_rdma_free(struct svc_xprt *xprt); | 59 | static void svc_rdma_free(struct svc_xprt *xprt); |
@@ -247,6 +246,7 @@ static void dto_tasklet_func(unsigned long data) | |||
247 | sq_cq_reap(xprt); | 246 | sq_cq_reap(xprt); |
248 | } | 247 | } |
249 | 248 | ||
249 | svc_xprt_put(&xprt->sc_xprt); | ||
250 | spin_lock_irqsave(&dto_lock, flags); | 250 | spin_lock_irqsave(&dto_lock, flags); |
251 | } | 251 | } |
252 | spin_unlock_irqrestore(&dto_lock, flags); | 252 | spin_unlock_irqrestore(&dto_lock, flags); |
@@ -275,8 +275,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) | |||
275 | * add it | 275 | * add it |
276 | */ | 276 | */ |
277 | spin_lock_irqsave(&dto_lock, flags); | 277 | spin_lock_irqsave(&dto_lock, flags); |
278 | if (list_empty(&xprt->sc_dto_q)) | 278 | if (list_empty(&xprt->sc_dto_q)) { |
279 | svc_xprt_get(&xprt->sc_xprt); | ||
279 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | 280 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); |
281 | } | ||
280 | spin_unlock_irqrestore(&dto_lock, flags); | 282 | spin_unlock_irqrestore(&dto_lock, flags); |
281 | 283 | ||
282 | /* Tasklet does all the work to avoid irqsave locks. */ | 284 | /* Tasklet does all the work to avoid irqsave locks. */ |
@@ -386,8 +388,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) | |||
386 | * add it | 388 | * add it |
387 | */ | 389 | */ |
388 | spin_lock_irqsave(&dto_lock, flags); | 390 | spin_lock_irqsave(&dto_lock, flags); |
389 | if (list_empty(&xprt->sc_dto_q)) | 391 | if (list_empty(&xprt->sc_dto_q)) { |
392 | svc_xprt_get(&xprt->sc_xprt); | ||
390 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | 393 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); |
394 | } | ||
391 | spin_unlock_irqrestore(&dto_lock, flags); | 395 | spin_unlock_irqrestore(&dto_lock, flags); |
392 | 396 | ||
393 | /* Tasklet does all the work to avoid irqsave locks. */ | 397 | /* Tasklet does all the work to avoid irqsave locks. */ |
@@ -611,6 +615,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, | |||
611 | switch (event->event) { | 615 | switch (event->event) { |
612 | case RDMA_CM_EVENT_ESTABLISHED: | 616 | case RDMA_CM_EVENT_ESTABLISHED: |
613 | /* Accept complete */ | 617 | /* Accept complete */ |
618 | svc_xprt_get(xprt); | ||
614 | dprintk("svcrdma: Connection completed on DTO xprt=%p, " | 619 | dprintk("svcrdma: Connection completed on DTO xprt=%p, " |
615 | "cm_id=%p\n", xprt, cma_id); | 620 | "cm_id=%p\n", xprt, cma_id); |
616 | clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); | 621 | clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); |
@@ -661,15 +666,15 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
661 | 666 | ||
662 | listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); | 667 | listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); |
663 | if (IS_ERR(listen_id)) { | 668 | if (IS_ERR(listen_id)) { |
664 | rdma_destroy_xprt(cma_xprt); | 669 | svc_xprt_put(&cma_xprt->sc_xprt); |
665 | dprintk("svcrdma: rdma_create_id failed = %ld\n", | 670 | dprintk("svcrdma: rdma_create_id failed = %ld\n", |
666 | PTR_ERR(listen_id)); | 671 | PTR_ERR(listen_id)); |
667 | return (void *)listen_id; | 672 | return (void *)listen_id; |
668 | } | 673 | } |
669 | ret = rdma_bind_addr(listen_id, sa); | 674 | ret = rdma_bind_addr(listen_id, sa); |
670 | if (ret) { | 675 | if (ret) { |
671 | rdma_destroy_xprt(cma_xprt); | ||
672 | rdma_destroy_id(listen_id); | 676 | rdma_destroy_id(listen_id); |
677 | svc_xprt_put(&cma_xprt->sc_xprt); | ||
673 | dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); | 678 | dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); |
674 | return ERR_PTR(ret); | 679 | return ERR_PTR(ret); |
675 | } | 680 | } |
@@ -678,8 +683,9 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
678 | ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); | 683 | ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); |
679 | if (ret) { | 684 | if (ret) { |
680 | rdma_destroy_id(listen_id); | 685 | rdma_destroy_id(listen_id); |
681 | rdma_destroy_xprt(cma_xprt); | 686 | svc_xprt_put(&cma_xprt->sc_xprt); |
682 | dprintk("svcrdma: rdma_listen failed = %d\n", ret); | 687 | dprintk("svcrdma: rdma_listen failed = %d\n", ret); |
688 | return ERR_PTR(ret); | ||
683 | } | 689 | } |
684 | 690 | ||
685 | /* | 691 | /* |
@@ -820,6 +826,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
820 | newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; | 826 | newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; |
821 | newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; | 827 | newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; |
822 | } | 828 | } |
829 | svc_xprt_get(&newxprt->sc_xprt); | ||
823 | newxprt->sc_qp = newxprt->sc_cm_id->qp; | 830 | newxprt->sc_qp = newxprt->sc_cm_id->qp; |
824 | 831 | ||
825 | /* Register all of physical memory */ | 832 | /* Register all of physical memory */ |
@@ -891,8 +898,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
891 | 898 | ||
892 | errout: | 899 | errout: |
893 | dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); | 900 | dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); |
901 | /* Take a reference in case the DTO handler runs */ | ||
902 | svc_xprt_get(&newxprt->sc_xprt); | ||
903 | if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { | ||
904 | ib_destroy_qp(newxprt->sc_qp); | ||
905 | svc_xprt_put(&newxprt->sc_xprt); | ||
906 | } | ||
894 | rdma_destroy_id(newxprt->sc_cm_id); | 907 | rdma_destroy_id(newxprt->sc_cm_id); |
895 | rdma_destroy_xprt(newxprt); | 908 | /* This call to put will destroy the transport */ |
909 | svc_xprt_put(&newxprt->sc_xprt); | ||
896 | return NULL; | 910 | return NULL; |
897 | } | 911 | } |
898 | 912 | ||
@@ -919,54 +933,60 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) | |||
919 | rqstp->rq_xprt_ctxt = NULL; | 933 | rqstp->rq_xprt_ctxt = NULL; |
920 | } | 934 | } |
921 | 935 | ||
922 | /* Disable data ready events for this connection */ | 936 | /* |
937 | * When connected, an svc_xprt has at least three references: | ||
938 | * | ||
939 | * - A reference held by the QP. We still hold that here because this | ||
940 | * code deletes the QP and puts the reference. | ||
941 | * | ||
942 | * - A reference held by the cm_id between the ESTABLISHED and | ||
943 | * DISCONNECTED events. If the remote peer disconnected first, this | ||
944 | * reference could be gone. | ||
945 | * | ||
946 | * - A reference held by the svc_recv code that called this function | ||
947 | * as part of close processing. | ||
948 | * | ||
949 | * At a minimum two references should still be held. | ||
950 | */ | ||
923 | static void svc_rdma_detach(struct svc_xprt *xprt) | 951 | static void svc_rdma_detach(struct svc_xprt *xprt) |
924 | { | 952 | { |
925 | struct svcxprt_rdma *rdma = | 953 | struct svcxprt_rdma *rdma = |
926 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | 954 | container_of(xprt, struct svcxprt_rdma, sc_xprt); |
927 | unsigned long flags; | ||
928 | |||
929 | dprintk("svc: svc_rdma_detach(%p)\n", xprt); | 955 | dprintk("svc: svc_rdma_detach(%p)\n", xprt); |
930 | /* | 956 | |
931 | * Shutdown the connection. This will ensure we don't get any | 957 | /* Disconnect and flush posted WQE */ |
932 | * more events from the provider. | ||
933 | */ | ||
934 | rdma_disconnect(rdma->sc_cm_id); | 958 | rdma_disconnect(rdma->sc_cm_id); |
935 | rdma_destroy_id(rdma->sc_cm_id); | ||
936 | 959 | ||
937 | /* We may already be on the DTO list */ | 960 | /* Destroy the QP if present (not a listener) */ |
938 | spin_lock_irqsave(&dto_lock, flags); | 961 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) { |
939 | if (!list_empty(&rdma->sc_dto_q)) | 962 | ib_destroy_qp(rdma->sc_qp); |
940 | list_del_init(&rdma->sc_dto_q); | 963 | svc_xprt_put(xprt); |
941 | spin_unlock_irqrestore(&dto_lock, flags); | 964 | } |
965 | |||
966 | /* Destroy the CM ID */ | ||
967 | rdma_destroy_id(rdma->sc_cm_id); | ||
942 | } | 968 | } |
943 | 969 | ||
944 | static void svc_rdma_free(struct svc_xprt *xprt) | 970 | static void svc_rdma_free(struct svc_xprt *xprt) |
945 | { | 971 | { |
946 | struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; | 972 | struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; |
947 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); | 973 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); |
948 | rdma_destroy_xprt(rdma); | 974 | /* We should only be called from kref_put */ |
949 | kfree(rdma); | 975 | BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); |
950 | } | 976 | if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) |
951 | 977 | ib_destroy_cq(rdma->sc_sq_cq); | |
952 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt) | ||
953 | { | ||
954 | if (xprt->sc_qp && !IS_ERR(xprt->sc_qp)) | ||
955 | ib_destroy_qp(xprt->sc_qp); | ||
956 | |||
957 | if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq)) | ||
958 | ib_destroy_cq(xprt->sc_sq_cq); | ||
959 | 978 | ||
960 | if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq)) | 979 | if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) |
961 | ib_destroy_cq(xprt->sc_rq_cq); | 980 | ib_destroy_cq(rdma->sc_rq_cq); |
962 | 981 | ||
963 | if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr)) | 982 | if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr)) |
964 | ib_dereg_mr(xprt->sc_phys_mr); | 983 | ib_dereg_mr(rdma->sc_phys_mr); |
965 | 984 | ||
966 | if (xprt->sc_pd && !IS_ERR(xprt->sc_pd)) | 985 | if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) |
967 | ib_dealloc_pd(xprt->sc_pd); | 986 | ib_dealloc_pd(rdma->sc_pd); |
968 | 987 | ||
969 | destroy_context_cache(xprt->sc_ctxt_head); | 988 | destroy_context_cache(rdma->sc_ctxt_head); |
989 | kfree(rdma); | ||
970 | } | 990 | } |
971 | 991 | ||
972 | static int svc_rdma_has_wspace(struct svc_xprt *xprt) | 992 | static int svc_rdma_has_wspace(struct svc_xprt *xprt) |