aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/nbd.c
diff options
context:
space:
mode:
authorMarkus Pargmann <mpa@pengutronix.de>2015-08-17 02:20:00 -0400
committerJens Axboe <axboe@fb.com>2015-08-17 10:22:47 -0400
commit7e2893a16d3e71035a38122a77bc55848a29f0e4 (patch)
treebc3b030306610519a7781dafefb2298821664a5a /drivers/block/nbd.c
parentc45f5c9943ce0b16b299b543c2aae12408039027 (diff)
nbd: Fix timeout detection
At the moment the nbd timeout just detects hanging tcp operations. This is not enough to detect a hanging or bad connection as expected of a timeout. This patch redesigns the timeout detection to include some more cases. The timeout is now in relation to replies from the server. If the server does not send replies within the timeout the connection will be shut down. The patch adds a continous timer 'timeout_timer' that is setup in one of two cases: - The request list is empty and we are sending the first request out to the server. We want to have a reply within the given timeout, otherwise we consider the connection to be dead. - A server response was received. This means the server is still communicating with us. The timer is reset to the timeout value. The timer is not stopped if the list becomes empty. It will just trigger a timeout which will directly leave the handling routine again as the request list is empty. The whole patch does not use any additional explicit locking. The list_empty() calls are safe to be used concurrently. The timer is locked internally as we just use mod_timer and del_timer_sync(). The patch is based on the idea of Michal Belczyk with a previous different implementation. Cc: Michal Belczyk <belczyk@bsd.krakow.pl> Cc: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de> Signed-off-by: Markus Pargmann <mpa@pengutronix.de> Tested-by: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r--drivers/block/nbd.c98
1 files changed, 70 insertions, 28 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f169faf9838a..f3536e68e63f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -59,6 +59,10 @@ struct nbd_device {
59 pid_t pid; /* pid of nbd-client, if attached */ 59 pid_t pid; /* pid of nbd-client, if attached */
60 int xmit_timeout; 60 int xmit_timeout;
61 int disconnect; /* a disconnect has been requested by user */ 61 int disconnect; /* a disconnect has been requested by user */
62
63 struct timer_list timeout_timer;
64 struct task_struct *task_recv;
65 struct task_struct *task_send;
62}; 66};
63 67
64#define NBD_MAGIC 0x68797548 68#define NBD_MAGIC 0x68797548
@@ -121,6 +125,7 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
121 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); 125 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
122 kernel_sock_shutdown(nbd->sock, SHUT_RDWR); 126 kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
123 nbd->sock = NULL; 127 nbd->sock = NULL;
128 del_timer_sync(&nbd->timeout_timer);
124 } 129 }
125 if (lock) 130 if (lock)
126 mutex_unlock(&nbd->tx_lock); 131 mutex_unlock(&nbd->tx_lock);
@@ -128,11 +133,23 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
128 133
129static void nbd_xmit_timeout(unsigned long arg) 134static void nbd_xmit_timeout(unsigned long arg)
130{ 135{
131 struct task_struct *task = (struct task_struct *)arg; 136 struct nbd_device *nbd = (struct nbd_device *)arg;
137 struct task_struct *task;
138
139 if (list_empty(&nbd->queue_head))
140 return;
141
142 nbd->disconnect = 1;
143
144 task = READ_ONCE(nbd->task_recv);
145 if (task)
146 force_sig(SIGKILL, task);
132 147
133 printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n", 148 task = READ_ONCE(nbd->task_send);
134 task->comm, task->pid); 149 if (task)
135 force_sig(SIGKILL, task); 150 force_sig(SIGKILL, nbd->task_send);
151
152 dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
136} 153}
137 154
138/* 155/*
@@ -171,33 +188,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
171 msg.msg_controllen = 0; 188 msg.msg_controllen = 0;
172 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 189 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
173 190
174 if (send) { 191 if (send)
175 struct timer_list ti;
176
177 if (nbd->xmit_timeout) {
178 init_timer(&ti);
179 ti.function = nbd_xmit_timeout;
180 ti.data = (unsigned long)current;
181 ti.expires = jiffies + nbd->xmit_timeout;
182 add_timer(&ti);
183 }
184 result = kernel_sendmsg(sock, &msg, &iov, 1, size); 192 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
185 if (nbd->xmit_timeout) 193 else
186 del_timer_sync(&ti);
187 } else
188 result = kernel_recvmsg(sock, &msg, &iov, 1, size, 194 result = kernel_recvmsg(sock, &msg, &iov, 1, size,
189 msg.msg_flags); 195 msg.msg_flags);
190 196
191 if (signal_pending(current)) {
192 siginfo_t info;
193 printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
194 task_pid_nr(current), current->comm,
195 dequeue_signal_lock(current, &current->blocked, &info));
196 result = -EINTR;
197 sock_shutdown(nbd, !send);
198 break;
199 }
200
201 if (result <= 0) { 197 if (result <= 0) {
202 if (result == 0) 198 if (result == 0)
203 result = -EPIPE; /* short read */ 199 result = -EPIPE; /* short read */
@@ -210,6 +206,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
210 sigprocmask(SIG_SETMASK, &oldset, NULL); 206 sigprocmask(SIG_SETMASK, &oldset, NULL);
211 tsk_restore_flags(current, pflags, PF_MEMALLOC); 207 tsk_restore_flags(current, pflags, PF_MEMALLOC);
212 208
209 if (!send && nbd->xmit_timeout)
210 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
211
213 return result; 212 return result;
214} 213}
215 214
@@ -415,12 +414,26 @@ static int nbd_do_it(struct nbd_device *nbd)
415 return ret; 414 return ret;
416 } 415 }
417 416
417 nbd->task_recv = current;
418
418 while ((req = nbd_read_stat(nbd)) != NULL) 419 while ((req = nbd_read_stat(nbd)) != NULL)
419 nbd_end_request(nbd, req); 420 nbd_end_request(nbd, req);
420 421
422 nbd->task_recv = NULL;
423
424 if (signal_pending(current)) {
425 siginfo_t info;
426
427 ret = dequeue_signal_lock(current, &current->blocked, &info);
428 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
429 task_pid_nr(current), current->comm, ret);
430 sock_shutdown(nbd, 1);
431 ret = -ETIMEDOUT;
432 }
433
421 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 434 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
422 nbd->pid = 0; 435 nbd->pid = 0;
423 return 0; 436 return ret;
424} 437}
425 438
426static void nbd_clear_que(struct nbd_device *nbd) 439static void nbd_clear_que(struct nbd_device *nbd)
@@ -482,6 +495,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
482 495
483 nbd->active_req = req; 496 nbd->active_req = req;
484 497
498 if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
499 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
500
485 if (nbd_send_req(nbd, req) != 0) { 501 if (nbd_send_req(nbd, req) != 0) {
486 dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); 502 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
487 req->errors++; 503 req->errors++;
@@ -508,6 +524,8 @@ static int nbd_thread(void *data)
508 struct nbd_device *nbd = data; 524 struct nbd_device *nbd = data;
509 struct request *req; 525 struct request *req;
510 526
527 nbd->task_send = current;
528
511 set_user_nice(current, MIN_NICE); 529 set_user_nice(current, MIN_NICE);
512 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { 530 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
513 /* wait for something to do */ 531 /* wait for something to do */
@@ -515,6 +533,18 @@ static int nbd_thread(void *data)
515 kthread_should_stop() || 533 kthread_should_stop() ||
516 !list_empty(&nbd->waiting_queue)); 534 !list_empty(&nbd->waiting_queue));
517 535
536 if (signal_pending(current)) {
537 siginfo_t info;
538 int ret;
539
540 ret = dequeue_signal_lock(current, &current->blocked,
541 &info);
542 dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
543 task_pid_nr(current), current->comm, ret);
544 sock_shutdown(nbd, 1);
545 break;
546 }
547
518 /* extract request */ 548 /* extract request */
519 if (list_empty(&nbd->waiting_queue)) 549 if (list_empty(&nbd->waiting_queue))
520 continue; 550 continue;
@@ -528,6 +558,9 @@ static int nbd_thread(void *data)
528 /* handle request */ 558 /* handle request */
529 nbd_handle_req(nbd, req); 559 nbd_handle_req(nbd, req);
530 } 560 }
561
562 nbd->task_send = NULL;
563
531 return 0; 564 return 0;
532} 565}
533 566
@@ -648,6 +681,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
648 681
649 case NBD_SET_TIMEOUT: 682 case NBD_SET_TIMEOUT:
650 nbd->xmit_timeout = arg * HZ; 683 nbd->xmit_timeout = arg * HZ;
684 if (arg)
685 mod_timer(&nbd->timeout_timer,
686 jiffies + nbd->xmit_timeout);
687 else
688 del_timer_sync(&nbd->timeout_timer);
689
651 return 0; 690 return 0;
652 691
653 case NBD_SET_FLAGS: 692 case NBD_SET_FLAGS:
@@ -842,6 +881,9 @@ static int __init nbd_init(void)
842 spin_lock_init(&nbd_dev[i].queue_lock); 881 spin_lock_init(&nbd_dev[i].queue_lock);
843 INIT_LIST_HEAD(&nbd_dev[i].queue_head); 882 INIT_LIST_HEAD(&nbd_dev[i].queue_head);
844 mutex_init(&nbd_dev[i].tx_lock); 883 mutex_init(&nbd_dev[i].tx_lock);
884 init_timer(&nbd_dev[i].timeout_timer);
885 nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
886 nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
845 init_waitqueue_head(&nbd_dev[i].active_wq); 887 init_waitqueue_head(&nbd_dev[i].active_wq);
846 init_waitqueue_head(&nbd_dev[i].waiting_wq); 888 init_waitqueue_head(&nbd_dev[i].waiting_wq);
847 nbd_dev[i].blksize = 1024; 889 nbd_dev[i].blksize = 1024;