diff options
author | Zhen Wei <zwei@novell.com> | 2007-01-23 20:19:59 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-02-07 15:15:11 -0500 |
commit | 925037bcba7691db2403684141a276930ad184f3 (patch) | |
tree | 5a928f3d3f8488d1094a4ced8f39228c9d5a8ca9 /fs | |
parent | f71aa8a55a0ae1a0d06c6079265d16502a678e8e (diff) |
ocfs2: introduce sc->sc_send_lock to protect outbound outbound messages
When there is a lot of multithreaded I/O usage, two threads can collide
while sending out a message to the other nodes. This is due to the lack of
locking between threads while sending out the messages.
When a connected TCP send(), sendto(), or sendmsg() arrives in the Linux
kernel, it eventually comes through tcp_sendmsg(). tcp_sendmsg() protects
itself by acquiring a lock at invocation by calling lock_sock().
tcp_sendmsg() then loops over the buffers in the iovec, allocating
associated sk_buff's and cache pages for use in the actual send. As it does
so, it pushes the data out to tcp for actual transmission. However, if one
of those allocation fails (because a large number of large sends is being
processed, for example), it must wait for memory to become available. It
does so by jumping to wait_for_sndbuf or wait_for_memory, both of which
eventually cause a call to sk_stream_wait_memory(). sk_stream_wait_memory()
contains a code path that calls sk_wait_event(). Finally, sk_wait_event()
contains the call to release_sock().
The following patch adds a lock to the socket container in order to
properly serialize outbound requests.
From: Zhen Wei <zwei@novell.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 8 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 2 |
2 files changed, 10 insertions, 0 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2021aec7cbbd..1718215fc018 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -556,6 +556,8 @@ static void o2net_register_callbacks(struct sock *sk, | |||
556 | sk->sk_data_ready = o2net_data_ready; | 556 | sk->sk_data_ready = o2net_data_ready; |
557 | sk->sk_state_change = o2net_state_change; | 557 | sk->sk_state_change = o2net_state_change; |
558 | 558 | ||
559 | mutex_init(&sc->sc_send_lock); | ||
560 | |||
559 | write_unlock_bh(&sk->sk_callback_lock); | 561 | write_unlock_bh(&sk->sk_callback_lock); |
560 | } | 562 | } |
561 | 563 | ||
@@ -858,10 +860,12 @@ static void o2net_sendpage(struct o2net_sock_container *sc, | |||
858 | ssize_t ret; | 860 | ssize_t ret; |
859 | 861 | ||
860 | 862 | ||
863 | mutex_lock(&sc->sc_send_lock); | ||
861 | ret = sc->sc_sock->ops->sendpage(sc->sc_sock, | 864 | ret = sc->sc_sock->ops->sendpage(sc->sc_sock, |
862 | virt_to_page(kmalloced_virt), | 865 | virt_to_page(kmalloced_virt), |
863 | (long)kmalloced_virt & ~PAGE_MASK, | 866 | (long)kmalloced_virt & ~PAGE_MASK, |
864 | size, MSG_DONTWAIT); | 867 | size, MSG_DONTWAIT); |
868 | mutex_unlock(&sc->sc_send_lock); | ||
865 | if (ret != size) { | 869 | if (ret != size) { |
866 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT | 870 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT |
867 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); | 871 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); |
@@ -976,8 +980,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
976 | 980 | ||
977 | /* finally, convert the message header to network byte-order | 981 | /* finally, convert the message header to network byte-order |
978 | * and send */ | 982 | * and send */ |
983 | mutex_lock(&sc->sc_send_lock); | ||
979 | ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, | 984 | ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, |
980 | sizeof(struct o2net_msg) + caller_bytes); | 985 | sizeof(struct o2net_msg) + caller_bytes); |
986 | mutex_unlock(&sc->sc_send_lock); | ||
981 | msglog(msg, "sending returned %d\n", ret); | 987 | msglog(msg, "sending returned %d\n", ret); |
982 | if (ret < 0) { | 988 | if (ret < 0) { |
983 | mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); | 989 | mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); |
@@ -1109,8 +1115,10 @@ static int o2net_process_message(struct o2net_sock_container *sc, | |||
1109 | 1115 | ||
1110 | out_respond: | 1116 | out_respond: |
1111 | /* this destroys the hdr, so don't use it after this */ | 1117 | /* this destroys the hdr, so don't use it after this */ |
1118 | mutex_lock(&sc->sc_send_lock); | ||
1112 | ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, | 1119 | ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, |
1113 | handler_status); | 1120 | handler_status); |
1121 | mutex_unlock(&sc->sc_send_lock); | ||
1114 | hdr = NULL; | 1122 | hdr = NULL; |
1115 | mlog(0, "sending handler status %d, syserr %d returned %d\n", | 1123 | mlog(0, "sending handler status %d, syserr %d returned %d\n", |
1116 | handler_status, syserr, ret); | 1124 | handler_status, syserr, ret); |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 177927a8f007..4dae5df5e467 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -155,6 +155,8 @@ struct o2net_sock_container { | |||
155 | struct timeval sc_tv_func_stop; | 155 | struct timeval sc_tv_func_stop; |
156 | u32 sc_msg_key; | 156 | u32 sc_msg_key; |
157 | u16 sc_msg_type; | 157 | u16 sc_msg_type; |
158 | |||
159 | struct mutex sc_send_lock; | ||
158 | }; | 160 | }; |
159 | 161 | ||
160 | struct o2net_msg_handler { | 162 | struct o2net_msg_handler { |