aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2008-10-28 20:47:57 -0400
committerSteve French <sfrench@us.ibm.com>2008-10-28 20:47:57 -0400
commitedf1ae403896cb7750800508b14996ba6be39a53 (patch)
treeff792ea77e558d473a9f6515397728d31e73fd09
parent49fdf6785fd660e18a1eb4588928f47e9fa29a9a (diff)
[CIFS] Reduce number of socket retries in large write path
CIFS in some heavy stress conditions cifs could get EAGAIN repeatedly in smb_send2 which led to repeated retries and eventually failure of large writes which could lead to data corruption. There are three changes that were suggested by various network developers: 1) convert cifs from non-blocking to blocking tcp sendmsg (we left in the retry on failure) 2) change cifs to not set sendbuf and rcvbuf size for the socket (let tcp autotune the buffer sizes since that works much better in the TCP stack now) 3) if we have a partial frame sent in smb_send2, mark the tcp session as invalid (close the socket and reconnect) so we do not corrupt the remaining part of the SMB with the beginning of the next SMB. This does not appear to hurt performance measurably and has been run in various scenarios, but it definately removes a corruption that we were seeing in some high stress test cases. Acked-by: Shirish Pargaonkar <shirishp@us.ibm.com> Signed-off-by: Steve French <sfrench@us.ibm.com>
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/connect.c50
-rw-r--r--fs/cifs/transport.c41
5 files changed, 76 insertions, 25 deletions
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8f528ea24c48..8855331b2fba 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,11 @@ Various fixes to make delete of open files behavior more predictable
4(when delete of an open file fails we mark the file as "delete-on-close" 4(when delete of an open file fails we mark the file as "delete-on-close"
5in a way that more servers accept, but only if we can first rename the 5in a way that more servers accept, but only if we can first rename the
6file to a temporary name). Add experimental support for more safely 6file to a temporary name). Add experimental support for more safely
7handling fcntl(F_SETLEASE). 7handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp
8sends, and also let tcp autotune the socket send and receive buffers.
9This reduces the number of EAGAIN errors returned by TCP/IP in
10high stress workloads (and the number of retries on socket writes
11when sending large SMBWriteX requests).
8 12
9Version 1.54 13Version 1.54
10------------ 14------------
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c791e5b5a914..1cb1189f24e0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -141,6 +141,8 @@ struct TCP_Server_Info {
141 char versionMajor; 141 char versionMajor;
142 char versionMinor; 142 char versionMinor;
143 bool svlocal:1; /* local server or remote */ 143 bool svlocal:1; /* local server or remote */
144 bool noblocksnd; /* use blocking sendmsg */
145 bool noautotune; /* do not autotune send buf sizes */
144 atomic_t socketUseCount; /* number of open cifs sessions on socket */ 146 atomic_t socketUseCount; /* number of open cifs sessions on socket */
145 atomic_t inFlight; /* number of requests on the wire to server */ 147 atomic_t inFlight; /* number of requests on the wire to server */
146#ifdef CONFIG_CIFS_STATS2 148#ifdef CONFIG_CIFS_STATS2
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 0cff7fe986e8..6f21ecb85ce5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,7 +36,7 @@ extern void cifs_buf_release(void *);
36extern struct smb_hdr *cifs_small_buf_get(void); 36extern struct smb_hdr *cifs_small_buf_get(void);
37extern void cifs_small_buf_release(void *); 37extern void cifs_small_buf_release(void *);
38extern int smb_send(struct socket *, struct smb_hdr *, 38extern int smb_send(struct socket *, struct smb_hdr *,
39 unsigned int /* length */ , struct sockaddr *); 39 unsigned int /* length */ , struct sockaddr *, bool);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); 42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid));
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71b7661e2260..e9f9248cb3fe 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -92,6 +92,8 @@ struct smb_vol {
92 bool seal:1; /* request transport encryption on share */ 92 bool seal:1; /* request transport encryption on share */
93 bool nodfs:1; /* Do not request DFS, even if available */ 93 bool nodfs:1; /* Do not request DFS, even if available */
94 bool local_lease:1; /* check leases only on local system, not remote */ 94 bool local_lease:1; /* check leases only on local system, not remote */
95 bool noblocksnd:1;
96 bool noautotune:1;
95 unsigned int rsize; 97 unsigned int rsize;
96 unsigned int wsize; 98 unsigned int wsize;
97 unsigned int sockopt; 99 unsigned int sockopt;
@@ -102,9 +104,11 @@ struct smb_vol {
102static int ipv4_connect(struct sockaddr_in *psin_server, 104static int ipv4_connect(struct sockaddr_in *psin_server,
103 struct socket **csocket, 105 struct socket **csocket,
104 char *netb_name, 106 char *netb_name,
105 char *server_netb_name); 107 char *server_netb_name,
108 bool noblocksnd,
109 bool nosndbuf); /* ipv6 never set sndbuf size */
106static int ipv6_connect(struct sockaddr_in6 *psin_server, 110static int ipv6_connect(struct sockaddr_in6 *psin_server,
107 struct socket **csocket); 111 struct socket **csocket, bool noblocksnd);
108 112
109 113
110 /* 114 /*
@@ -191,12 +195,13 @@ cifs_reconnect(struct TCP_Server_Info *server)
191 try_to_freeze(); 195 try_to_freeze();
192 if (server->protocolType == IPV6) { 196 if (server->protocolType == IPV6) {
193 rc = ipv6_connect(&server->addr.sockAddr6, 197 rc = ipv6_connect(&server->addr.sockAddr6,
194 &server->ssocket); 198 &server->ssocket, server->noautotune);
195 } else { 199 } else {
196 rc = ipv4_connect(&server->addr.sockAddr, 200 rc = ipv4_connect(&server->addr.sockAddr,
197 &server->ssocket, 201 &server->ssocket,
198 server->workstation_RFC1001_name, 202 server->workstation_RFC1001_name,
199 server->server_RFC1001_name); 203 server->server_RFC1001_name,
204 server->noblocksnd, server->noautotune);
200 } 205 }
201 if (rc) { 206 if (rc) {
202 cFYI(1, ("reconnect error %d", rc)); 207 cFYI(1, ("reconnect error %d", rc));
@@ -1192,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname,
1192 /* ignore */ 1197 /* ignore */
1193 } else if (strnicmp(data, "rw", 2) == 0) { 1198 } else if (strnicmp(data, "rw", 2) == 0) {
1194 vol->rw = true; 1199 vol->rw = true;
1200 } else if (strnicmp(data, "noblocksend", 11) == 0) {
1201 vol->noblocksnd = 1;
1202 } else if (strnicmp(data, "noautotune", 10) == 0) {
1203 vol->noautotune = 1;
1195 } else if ((strnicmp(data, "suid", 4) == 0) || 1204 } else if ((strnicmp(data, "suid", 4) == 0) ||
1196 (strnicmp(data, "nosuid", 6) == 0) || 1205 (strnicmp(data, "nosuid", 6) == 0) ||
1197 (strnicmp(data, "exec", 4) == 0) || 1206 (strnicmp(data, "exec", 4) == 0) ||
@@ -1518,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length)
1518 1527
1519static int 1528static int
1520ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, 1529ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1521 char *netbios_name, char *target_name) 1530 char *netbios_name, char *target_name,
1531 bool noblocksnd, bool noautotune)
1522{ 1532{
1523 int rc = 0; 1533 int rc = 0;
1524 int connected = 0; 1534 int connected = 0;
@@ -1590,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1590 (*csocket)->sk->sk_sndbuf, 1600 (*csocket)->sk->sk_sndbuf,
1591 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); 1601 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo));
1592 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1602 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1603 if (!noblocksnd)
1604 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1605
1593 /* make the bufsizes depend on wsize/rsize and max requests */ 1606 /* make the bufsizes depend on wsize/rsize and max requests */
1594 if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) 1607 if (noautotune) {
1595 (*csocket)->sk->sk_sndbuf = 200 * 1024; 1608 if ((*csocket)->sk->sk_sndbuf < (200 * 1024))
1596 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) 1609 (*csocket)->sk->sk_sndbuf = 200 * 1024;
1597 (*csocket)->sk->sk_rcvbuf = 140 * 1024; 1610 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024))
1611 (*csocket)->sk->sk_rcvbuf = 140 * 1024;
1612 }
1598 1613
1599 /* send RFC1001 sessinit */ 1614 /* send RFC1001 sessinit */
1600 if (psin_server->sin_port == htons(RFC1001_PORT)) { 1615 if (psin_server->sin_port == htons(RFC1001_PORT)) {
@@ -1631,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1631 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 1646 /* sizeof RFC1002_SESSION_REQUEST with no scope */
1632 smb_buf->smb_buf_length = 0x81000044; 1647 smb_buf->smb_buf_length = 0x81000044;
1633 rc = smb_send(*csocket, smb_buf, 0x44, 1648 rc = smb_send(*csocket, smb_buf, 0x44,
1634 (struct sockaddr *)psin_server); 1649 (struct sockaddr *)psin_server, noblocksnd);
1635 kfree(ses_init_buf); 1650 kfree(ses_init_buf);
1636 msleep(1); /* RFC1001 layer in at least one server 1651 msleep(1); /* RFC1001 layer in at least one server
1637 requires very short break before negprot 1652 requires very short break before negprot
@@ -1651,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1651} 1666}
1652 1667
1653static int 1668static int
1654ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) 1669ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket,
1670 bool noblocksnd)
1655{ 1671{
1656 int rc = 0; 1672 int rc = 0;
1657 int connected = 0; 1673 int connected = 0;
@@ -1720,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1720 the default. sock_setsockopt not used because it expects 1736 the default. sock_setsockopt not used because it expects
1721 user space buffer */ 1737 user space buffer */
1722 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1738 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1739 if (!noblocksnd)
1740 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1741
1723 1742
1724 return rc; 1743 return rc;
1725} 1744}
@@ -1983,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1983 cFYI(1, ("attempting ipv6 connect")); 2002 cFYI(1, ("attempting ipv6 connect"));
1984 /* BB should we allow ipv6 on port 139? */ 2003 /* BB should we allow ipv6 on port 139? */
1985 /* other OS never observed in Wild doing 139 with v6 */ 2004 /* other OS never observed in Wild doing 139 with v6 */
1986 rc = ipv6_connect(&sin_server6, &csocket); 2005 rc = ipv6_connect(&sin_server6, &csocket,
2006 volume_info.noblocksnd);
1987 } else 2007 } else
1988 rc = ipv4_connect(&sin_server, &csocket, 2008 rc = ipv4_connect(&sin_server, &csocket,
1989 volume_info.source_rfc1001_name, 2009 volume_info.source_rfc1001_name,
1990 volume_info.target_rfc1001_name); 2010 volume_info.target_rfc1001_name,
2011 volume_info.noblocksnd,
2012 volume_info.noautotune);
1991 if (rc < 0) { 2013 if (rc < 0) {
1992 cERROR(1, ("Error connecting to IPv4 socket. " 2014 cERROR(1, ("Error connecting to IPv4 socket. "
1993 "Aborting operation")); 2015 "Aborting operation"));
@@ -2002,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2002 sock_release(csocket); 2024 sock_release(csocket);
2003 goto out; 2025 goto out;
2004 } else { 2026 } else {
2027 srvTcp->noblocksnd = volume_info.noblocksnd;
2028 srvTcp->noautotune = volume_info.noautotune;
2005 memcpy(&srvTcp->addr.sockAddr, &sin_server, 2029 memcpy(&srvTcp->addr.sockAddr, &sin_server,
2006 sizeof(struct sockaddr_in)); 2030 sizeof(struct sockaddr_in));
2007 atomic_set(&srvTcp->inFlight, 0); 2031 atomic_set(&srvTcp->inFlight, 0);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index bf0e6d8e382a..ba4d66644ebf 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
161 161
162int 162int
163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, 163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
164 unsigned int smb_buf_length, struct sockaddr *sin) 164 unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd)
165{ 165{
166 int rc = 0; 166 int rc = 0;
167 int i = 0; 167 int i = 0;
@@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
178 smb_msg.msg_namelen = sizeof(struct sockaddr); 178 smb_msg.msg_namelen = sizeof(struct sockaddr);
179 smb_msg.msg_control = NULL; 179 smb_msg.msg_control = NULL;
180 smb_msg.msg_controllen = 0; 180 smb_msg.msg_controllen = 0;
181 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 181 if (noblocksnd)
182 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
183 else
184 smb_msg.msg_flags = MSG_NOSIGNAL;
182 185
183 /* smb header is converted in header_assemble. bcc and rest of SMB word 186 /* smb header is converted in header_assemble. bcc and rest of SMB word
184 area, and byte area if necessary, is converted to littleendian in 187 area, and byte area if necessary, is converted to littleendian in
@@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
229} 232}
230 233
231static int 234static int
232smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, 235smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec,
233 struct sockaddr *sin) 236 struct sockaddr *sin, bool noblocksnd)
234{ 237{
235 int rc = 0; 238 int rc = 0;
236 int i = 0; 239 int i = 0;
@@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
240 unsigned int total_len; 243 unsigned int total_len;
241 int first_vec = 0; 244 int first_vec = 0;
242 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 245 unsigned int smb_buf_length = smb_buffer->smb_buf_length;
246 struct socket *ssocket = server->ssocket;
243 247
244 if (ssocket == NULL) 248 if (ssocket == NULL)
245 return -ENOTSOCK; /* BB eventually add reconnect code here */ 249 return -ENOTSOCK; /* BB eventually add reconnect code here */
@@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
248 smb_msg.msg_namelen = sizeof(struct sockaddr); 252 smb_msg.msg_namelen = sizeof(struct sockaddr);
249 smb_msg.msg_control = NULL; 253 smb_msg.msg_control = NULL;
250 smb_msg.msg_controllen = 0; 254 smb_msg.msg_controllen = 0;
251 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 255 if (noblocksnd)
256 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
257 else
258 smb_msg.msg_flags = MSG_NOSIGNAL;
252 259
253 /* smb header is converted in header_assemble. bcc and rest of SMB word 260 /* smb header is converted in header_assemble. bcc and rest of SMB word
254 area, and byte area if necessary, is converted to littleendian in 261 area, and byte area if necessary, is converted to littleendian in
@@ -312,6 +319,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
312 i = 0; /* in case we get ENOSPC on the next send */ 319 i = 0; /* in case we get ENOSPC on the next send */
313 } 320 }
314 321
322 if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
323 cFYI(1, ("partial send (%d remaining), terminating session",
324 total_len));
325 /* If we have only sent part of an SMB then the next SMB
326 could be taken as the remainder of this one. We need
327 to kill the socket so the server throws away the partial
328 SMB */
329 server->tcpStatus = CifsNeedReconnect;
330 }
331
315 if (rc < 0) { 332 if (rc < 0) {
316 cERROR(1, ("Error %d sending data on socket to server", rc)); 333 cERROR(1, ("Error %d sending data on socket to server", rc));
317 } else 334 } else
@@ -518,8 +535,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
518#ifdef CONFIG_CIFS_STATS2 535#ifdef CONFIG_CIFS_STATS2
519 atomic_inc(&ses->server->inSend); 536 atomic_inc(&ses->server->inSend);
520#endif 537#endif
521 rc = smb_send2(ses->server->ssocket, iov, n_vec, 538 rc = smb_send2(ses->server, iov, n_vec,
522 (struct sockaddr *) &(ses->server->addr.sockAddr)); 539 (struct sockaddr *) &(ses->server->addr.sockAddr),
540 ses->server->noblocksnd);
523#ifdef CONFIG_CIFS_STATS2 541#ifdef CONFIG_CIFS_STATS2
524 atomic_dec(&ses->server->inSend); 542 atomic_dec(&ses->server->inSend);
525 midQ->when_sent = jiffies; 543 midQ->when_sent = jiffies;
@@ -711,7 +729,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
711 atomic_inc(&ses->server->inSend); 729 atomic_inc(&ses->server->inSend);
712#endif 730#endif
713 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 731 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
714 (struct sockaddr *) &(ses->server->addr.sockAddr)); 732 (struct sockaddr *) &(ses->server->addr.sockAddr),
733 ses->server->noblocksnd);
715#ifdef CONFIG_CIFS_STATS2 734#ifdef CONFIG_CIFS_STATS2
716 atomic_dec(&ses->server->inSend); 735 atomic_dec(&ses->server->inSend);
717 midQ->when_sent = jiffies; 736 midQ->when_sent = jiffies;
@@ -851,7 +870,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf,
851 return rc; 870 return rc;
852 } 871 }
853 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 872 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
854 (struct sockaddr *) &(ses->server->addr.sockAddr)); 873 (struct sockaddr *) &(ses->server->addr.sockAddr),
874 ses->server->noblocksnd);
855 up(&ses->server->tcpSem); 875 up(&ses->server->tcpSem);
856 return rc; 876 return rc;
857} 877}
@@ -941,7 +961,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
941 atomic_inc(&ses->server->inSend); 961 atomic_inc(&ses->server->inSend);
942#endif 962#endif
943 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 963 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
944 (struct sockaddr *) &(ses->server->addr.sockAddr)); 964 (struct sockaddr *) &(ses->server->addr.sockAddr),
965 ses->server->noblocksnd);
945#ifdef CONFIG_CIFS_STATS2 966#ifdef CONFIG_CIFS_STATS2
946 atomic_dec(&ses->server->inSend); 967 atomic_dec(&ses->server->inSend);
947 midQ->when_sent = jiffies; 968 midQ->when_sent = jiffies;