diff options
author | Jeff Layton <jlayton@redhat.com> | 2011-01-11 07:24:02 -0500 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2011-01-20 12:07:49 -0500 |
commit | 0ade640e9cda805692dbf688f4bb69e94719275a (patch) | |
tree | 79676b2553e1d7c6a49dd31fb455c0c3ef27b5a0 | |
parent | 2fbc2f1729e785a7b2faf9d8d60926bb1ff62af0 (diff) |
cifs: wait indefinitely for responses
The client should not be timing out on individual SMB requests. Too much
of the state between client and server is tied to the state of the
socket. If we time out requests and issue spurious disconnects then that
comprimises data integrity.
Instead of doing this complicated dance where we try to decide how long
to wait for a response for particular requests, have the client instead
wait indefinitely for a response. Also, use a TASK_KILLABLE sleep here
so that fatal signals will break out of this waiting.
Later patches will add support for detecting dead peers and forcing
reconnects based on that.
Reviewed-by: Suresh Jayaraman <sjayaraman@suse.de>
Reviewed-by: Pavel Shilovsky <piastryyy@gmail.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
-rw-r--r-- | fs/cifs/transport.c | 110 |
1 files changed, 17 insertions, 93 deletions
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 15059c7ef2ae..c41c9c4f0a79 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -318,48 +318,17 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, | |||
318 | return 0; | 318 | return 0; |
319 | } | 319 | } |
320 | 320 | ||
321 | static int wait_for_response(struct cifsSesInfo *ses, | 321 | static int |
322 | struct mid_q_entry *midQ, | 322 | wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) |
323 | unsigned long timeout, | ||
324 | unsigned long time_to_wait) | ||
325 | { | 323 | { |
326 | unsigned long curr_timeout; | 324 | int error; |
327 | |||
328 | for (;;) { | ||
329 | curr_timeout = timeout + jiffies; | ||
330 | wait_event_timeout(ses->server->response_q, | ||
331 | midQ->midState != MID_REQUEST_SUBMITTED, timeout); | ||
332 | |||
333 | if (time_after(jiffies, curr_timeout) && | ||
334 | (midQ->midState == MID_REQUEST_SUBMITTED) && | ||
335 | ((ses->server->tcpStatus == CifsGood) || | ||
336 | (ses->server->tcpStatus == CifsNew))) { | ||
337 | |||
338 | unsigned long lrt; | ||
339 | 325 | ||
340 | /* We timed out. Is the server still | 326 | error = wait_event_killable(server->response_q, |
341 | sending replies ? */ | 327 | midQ->midState != MID_REQUEST_SUBMITTED); |
342 | spin_lock(&GlobalMid_Lock); | 328 | if (error < 0) |
343 | lrt = ses->server->lstrp; | 329 | return -ERESTARTSYS; |
344 | spin_unlock(&GlobalMid_Lock); | ||
345 | 330 | ||
346 | /* Calculate time_to_wait past last receive time. | 331 | return 0; |
347 | Although we prefer not to time out if the | ||
348 | server is still responding - we will time | ||
349 | out if the server takes more than 15 (or 45 | ||
350 | or 180) seconds to respond to this request | ||
351 | and has not responded to any request from | ||
352 | other threads on the client within 10 seconds */ | ||
353 | lrt += time_to_wait; | ||
354 | if (time_after(jiffies, lrt)) { | ||
355 | /* No replies for time_to_wait. */ | ||
356 | cERROR(1, "server not responding"); | ||
357 | return -1; | ||
358 | } | ||
359 | } else { | ||
360 | return 0; | ||
361 | } | ||
362 | } | ||
363 | } | 332 | } |
364 | 333 | ||
365 | 334 | ||
@@ -433,7 +402,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
433 | int rc = 0; | 402 | int rc = 0; |
434 | int long_op; | 403 | int long_op; |
435 | unsigned int receive_len; | 404 | unsigned int receive_len; |
436 | unsigned long timeout; | ||
437 | struct mid_q_entry *midQ; | 405 | struct mid_q_entry *midQ; |
438 | struct smb_hdr *in_buf = iov[0].iov_base; | 406 | struct smb_hdr *in_buf = iov[0].iov_base; |
439 | 407 | ||
@@ -500,33 +468,12 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
500 | if (rc < 0) | 468 | if (rc < 0) |
501 | goto out; | 469 | goto out; |
502 | 470 | ||
503 | if (long_op == CIFS_STD_OP) | 471 | if (long_op == CIFS_ASYNC_OP) |
504 | timeout = 15 * HZ; | ||
505 | else if (long_op == CIFS_VLONG_OP) /* e.g. slow writes past EOF */ | ||
506 | timeout = 180 * HZ; | ||
507 | else if (long_op == CIFS_LONG_OP) | ||
508 | timeout = 45 * HZ; /* should be greater than | ||
509 | servers oplock break timeout (about 43 seconds) */ | ||
510 | else if (long_op == CIFS_ASYNC_OP) | ||
511 | goto out; | ||
512 | else if (long_op == CIFS_BLOCKING_OP) | ||
513 | timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */ | ||
514 | else { | ||
515 | cERROR(1, "unknown timeout flag %d", long_op); | ||
516 | rc = -EIO; | ||
517 | goto out; | 472 | goto out; |
518 | } | ||
519 | |||
520 | /* wait for 15 seconds or until woken up due to response arriving or | ||
521 | due to last connection to this server being unmounted */ | ||
522 | if (signal_pending(current)) { | ||
523 | /* if signal pending do not hold up user for full smb timeout | ||
524 | but we still give response a chance to complete */ | ||
525 | timeout = 2 * HZ; | ||
526 | } | ||
527 | 473 | ||
528 | /* No user interrupts in wait - wreaks havoc with performance */ | 474 | rc = wait_for_response(ses->server, midQ); |
529 | wait_for_response(ses, midQ, timeout, 10 * HZ); | 475 | if (rc != 0) |
476 | goto out; | ||
530 | 477 | ||
531 | rc = sync_mid_result(midQ, ses->server); | 478 | rc = sync_mid_result(midQ, ses->server); |
532 | if (rc != 0) { | 479 | if (rc != 0) { |
@@ -604,7 +551,6 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
604 | { | 551 | { |
605 | int rc = 0; | 552 | int rc = 0; |
606 | unsigned int receive_len; | 553 | unsigned int receive_len; |
607 | unsigned long timeout; | ||
608 | struct mid_q_entry *midQ; | 554 | struct mid_q_entry *midQ; |
609 | 555 | ||
610 | if (ses == NULL) { | 556 | if (ses == NULL) { |
@@ -668,33 +614,12 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
668 | if (rc < 0) | 614 | if (rc < 0) |
669 | goto out; | 615 | goto out; |
670 | 616 | ||
671 | if (long_op == CIFS_STD_OP) | 617 | if (long_op == CIFS_ASYNC_OP) |
672 | timeout = 15 * HZ; | ||
673 | /* wait for 15 seconds or until woken up due to response arriving or | ||
674 | due to last connection to this server being unmounted */ | ||
675 | else if (long_op == CIFS_ASYNC_OP) | ||
676 | goto out; | ||
677 | else if (long_op == CIFS_VLONG_OP) /* writes past EOF can be slow */ | ||
678 | timeout = 180 * HZ; | ||
679 | else if (long_op == CIFS_LONG_OP) | ||
680 | timeout = 45 * HZ; /* should be greater than | ||
681 | servers oplock break timeout (about 43 seconds) */ | ||
682 | else if (long_op == CIFS_BLOCKING_OP) | ||
683 | timeout = 0x7FFFFFFF; /* large but no so large as to wrap */ | ||
684 | else { | ||
685 | cERROR(1, "unknown timeout flag %d", long_op); | ||
686 | rc = -EIO; | ||
687 | goto out; | 618 | goto out; |
688 | } | ||
689 | 619 | ||
690 | if (signal_pending(current)) { | 620 | rc = wait_for_response(ses->server, midQ); |
691 | /* if signal pending do not hold up user for full smb timeout | 621 | if (rc != 0) |
692 | but we still give response a chance to complete */ | 622 | goto out; |
693 | timeout = 2 * HZ; | ||
694 | } | ||
695 | |||
696 | /* No user interrupts in wait - wreaks havoc with performance */ | ||
697 | wait_for_response(ses, midQ, timeout, 10 * HZ); | ||
698 | 623 | ||
699 | rc = sync_mid_result(midQ, ses->server); | 624 | rc = sync_mid_result(midQ, ses->server); |
700 | if (rc != 0) { | 625 | if (rc != 0) { |
@@ -915,8 +840,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
915 | } | 840 | } |
916 | } | 841 | } |
917 | 842 | ||
918 | /* Wait 5 seconds for the response. */ | 843 | if (wait_for_response(ses->server, midQ) == 0) { |
919 | if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ) == 0) { | ||
920 | /* We got the response - restart system call. */ | 844 | /* We got the response - restart system call. */ |
921 | rstart = 1; | 845 | rstart = 1; |
922 | } | 846 | } |