aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Clements <paul.clements@steeleye.com>2007-10-17 02:27:37 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-17 11:42:55 -0400
commit7fdfd4065c264bddd2d9277470a6a99d34e01bef (patch)
treee2423a0338bf5d48f24bca1091e3069508f45f01
parent4b86a872561ad052bdc6f092a06807822d26beb1 (diff)
NBD: allow hung network I/O to be cancelled
Allow NBD I/O to be cancelled when a network outage occurs. Previously, I/O would just hang, and if enough I/O was hung in nbd, the system (at least user-level) would completely hang until a TCP timeout (default, 15 minutes) occurred. The patch introduces a new ioctl NBD_SET_TIMEOUT that allows a transmit timeout value (in seconds) to be specified. Any network send that exceeds the timeout will be cancelled and the nbd connection will be shut down. I've tested with various timeout values and 6 seconds seems to be a good choice for the timeout. If the NBD_SET_TIMEOUT ioctl is not called, you get the old (I/O hang) behavior. Signed-off-by: Paul Clements <paul.clements@steeleye.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/block/nbd.c93
-rw-r--r--include/linux/nbd.h2
2 files changed, 64 insertions, 31 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 89bdafd88dbd..cb136a919f2a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -113,12 +113,42 @@ static void nbd_end_request(struct request *req)
113 spin_unlock_irqrestore(q->queue_lock, flags); 113 spin_unlock_irqrestore(q->queue_lock, flags);
114} 114}
115 115
116static void sock_shutdown(struct nbd_device *lo, int lock)
117{
118 /* Forcibly shutdown the socket causing all listeners
119 * to error
120 *
121 * FIXME: This code is duplicated from sys_shutdown, but
122 * there should be a more generic interface rather than
123 * calling socket ops directly here */
124 if (lock)
125 mutex_lock(&lo->tx_lock);
126 if (lo->sock) {
127 printk(KERN_WARNING "%s: shutting down socket\n",
128 lo->disk->disk_name);
129 lo->sock->ops->shutdown(lo->sock, SEND_SHUTDOWN|RCV_SHUTDOWN);
130 lo->sock = NULL;
131 }
132 if (lock)
133 mutex_unlock(&lo->tx_lock);
134}
135
136static void nbd_xmit_timeout(unsigned long arg)
137{
138 struct task_struct *task = (struct task_struct *)arg;
139
140 printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
141 task->comm, task->pid);
142 force_sig(SIGKILL, task);
143}
144
116/* 145/*
117 * Send or receive packet. 146 * Send or receive packet.
118 */ 147 */
119static int sock_xmit(struct socket *sock, int send, void *buf, int size, 148static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
120 int msg_flags) 149 int msg_flags)
121{ 150{
151 struct socket *sock = lo->sock;
122 int result; 152 int result;
123 struct msghdr msg; 153 struct msghdr msg;
124 struct kvec iov; 154 struct kvec iov;
@@ -139,9 +169,20 @@ static int sock_xmit(struct socket *sock, int send, void *buf, int size,
139 msg.msg_controllen = 0; 169 msg.msg_controllen = 0;
140 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 170 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
141 171
142 if (send) 172 if (send) {
173 struct timer_list ti;
174
175 if (lo->xmit_timeout) {
176 init_timer(&ti);
177 ti.function = nbd_xmit_timeout;
178 ti.data = (unsigned long)current;
179 ti.expires = jiffies + lo->xmit_timeout;
180 add_timer(&ti);
181 }
143 result = kernel_sendmsg(sock, &msg, &iov, 1, size); 182 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
144 else 183 if (lo->xmit_timeout)
184 del_timer_sync(&ti);
185 } else
145 result = kernel_recvmsg(sock, &msg, &iov, 1, size, 0); 186 result = kernel_recvmsg(sock, &msg, &iov, 1, size, 0);
146 187
147 if (signal_pending(current)) { 188 if (signal_pending(current)) {
@@ -150,6 +191,7 @@ static int sock_xmit(struct socket *sock, int send, void *buf, int size,
150 current->pid, current->comm, 191 current->pid, current->comm,
151 dequeue_signal_lock(current, &current->blocked, &info)); 192 dequeue_signal_lock(current, &current->blocked, &info));
152 result = -EINTR; 193 result = -EINTR;
194 sock_shutdown(lo, !send);
153 break; 195 break;
154 } 196 }
155 197
@@ -167,23 +209,22 @@ static int sock_xmit(struct socket *sock, int send, void *buf, int size,
167 return result; 209 return result;
168} 210}
169 211
170static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec, 212static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec,
171 int flags) 213 int flags)
172{ 214{
173 int result; 215 int result;
174 void *kaddr = kmap(bvec->bv_page); 216 void *kaddr = kmap(bvec->bv_page);
175 result = sock_xmit(sock, 1, kaddr + bvec->bv_offset, bvec->bv_len, 217 result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags);
176 flags);
177 kunmap(bvec->bv_page); 218 kunmap(bvec->bv_page);
178 return result; 219 return result;
179} 220}
180 221
222/* always call with the tx_lock held */
181static int nbd_send_req(struct nbd_device *lo, struct request *req) 223static int nbd_send_req(struct nbd_device *lo, struct request *req)
182{ 224{
183 int result, flags; 225 int result, flags;
184 struct nbd_request request; 226 struct nbd_request request;
185 unsigned long size = req->nr_sectors << 9; 227 unsigned long size = req->nr_sectors << 9;
186 struct socket *sock = lo->sock;
187 228
188 request.magic = htonl(NBD_REQUEST_MAGIC); 229 request.magic = htonl(NBD_REQUEST_MAGIC);
189 request.type = htonl(nbd_cmd(req)); 230 request.type = htonl(nbd_cmd(req));
@@ -196,8 +237,8 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
196 nbdcmd_to_ascii(nbd_cmd(req)), 237 nbdcmd_to_ascii(nbd_cmd(req)),
197 (unsigned long long)req->sector << 9, 238 (unsigned long long)req->sector << 9,
198 req->nr_sectors << 9); 239 req->nr_sectors << 9);
199 result = sock_xmit(sock, 1, &request, sizeof(request), 240 result = sock_xmit(lo, 1, &request, sizeof(request),
200 (nbd_cmd(req) == NBD_CMD_WRITE)? MSG_MORE: 0); 241 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
201 if (result <= 0) { 242 if (result <= 0) {
202 printk(KERN_ERR "%s: Send control failed (result %d)\n", 243 printk(KERN_ERR "%s: Send control failed (result %d)\n",
203 lo->disk->disk_name, result); 244 lo->disk->disk_name, result);
@@ -217,7 +258,7 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
217 flags = MSG_MORE; 258 flags = MSG_MORE;
218 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", 259 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
219 lo->disk->disk_name, req, bvec->bv_len); 260 lo->disk->disk_name, req, bvec->bv_len);
220 result = sock_send_bvec(sock, bvec, flags); 261 result = sock_send_bvec(lo, bvec, flags);
221 if (result <= 0) { 262 if (result <= 0) {
222 printk(KERN_ERR "%s: Send data failed (result %d)\n", 263 printk(KERN_ERR "%s: Send data failed (result %d)\n",
223 lo->disk->disk_name, result); 264 lo->disk->disk_name, result);
@@ -257,11 +298,11 @@ out:
257 return ERR_PTR(err); 298 return ERR_PTR(err);
258} 299}
259 300
260static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec) 301static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec)
261{ 302{
262 int result; 303 int result;
263 void *kaddr = kmap(bvec->bv_page); 304 void *kaddr = kmap(bvec->bv_page);
264 result = sock_xmit(sock, 0, kaddr + bvec->bv_offset, bvec->bv_len, 305 result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len,
265 MSG_WAITALL); 306 MSG_WAITALL);
266 kunmap(bvec->bv_page); 307 kunmap(bvec->bv_page);
267 return result; 308 return result;
@@ -273,10 +314,9 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
273 int result; 314 int result;
274 struct nbd_reply reply; 315 struct nbd_reply reply;
275 struct request *req; 316 struct request *req;
276 struct socket *sock = lo->sock;
277 317
278 reply.magic = 0; 318 reply.magic = 0;
279 result = sock_xmit(sock, 0, &reply, sizeof(reply), MSG_WAITALL); 319 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
280 if (result <= 0) { 320 if (result <= 0) {
281 printk(KERN_ERR "%s: Receive control failed (result %d)\n", 321 printk(KERN_ERR "%s: Receive control failed (result %d)\n",
282 lo->disk->disk_name, result); 322 lo->disk->disk_name, result);
@@ -317,7 +357,7 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
317 struct bio_vec *bvec; 357 struct bio_vec *bvec;
318 358
319 rq_for_each_segment(bvec, req, iter) { 359 rq_for_each_segment(bvec, req, iter) {
320 result = sock_recv_bvec(sock, bvec); 360 result = sock_recv_bvec(lo, bvec);
321 if (result <= 0) { 361 if (result <= 0) {
322 printk(KERN_ERR "%s: Receive data failed (result %d)\n", 362 printk(KERN_ERR "%s: Receive data failed (result %d)\n",
323 lo->disk->disk_name, result); 363 lo->disk->disk_name, result);
@@ -392,6 +432,7 @@ static void nbd_clear_que(struct nbd_device *lo)
392 } 432 }
393} 433}
394 434
435
395/* 436/*
396 * We always wait for result of write, for now. It would be nice to make it optional 437 * We always wait for result of write, for now. It would be nice to make it optional
397 * in future 438 * in future
@@ -500,7 +541,9 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
500 sreq.nr_sectors = 0; 541 sreq.nr_sectors = 0;
501 if (!lo->sock) 542 if (!lo->sock)
502 return -EINVAL; 543 return -EINVAL;
544 mutex_lock(&lo->tx_lock);
503 nbd_send_req(lo, &sreq); 545 nbd_send_req(lo, &sreq);
546 mutex_unlock(&lo->tx_lock);
504 return 0; 547 return 0;
505 548
506 case NBD_CLEAR_SOCK: 549 case NBD_CLEAR_SOCK:
@@ -544,6 +587,9 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
544 set_blocksize(inode->i_bdev, lo->blksize); 587 set_blocksize(inode->i_bdev, lo->blksize);
545 set_capacity(lo->disk, lo->bytesize >> 9); 588 set_capacity(lo->disk, lo->bytesize >> 9);
546 return 0; 589 return 0;
590 case NBD_SET_TIMEOUT:
591 lo->xmit_timeout = arg * HZ;
592 return 0;
547 case NBD_SET_SIZE_BLOCKS: 593 case NBD_SET_SIZE_BLOCKS:
548 lo->bytesize = ((u64) arg) * lo->blksize; 594 lo->bytesize = ((u64) arg) * lo->blksize;
549 inode->i_bdev->bd_inode->i_size = lo->bytesize; 595 inode->i_bdev->bd_inode->i_size = lo->bytesize;
@@ -556,22 +602,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
556 error = nbd_do_it(lo); 602 error = nbd_do_it(lo);
557 if (error) 603 if (error)
558 return error; 604 return error;
559 /* on return tidy up in case we have a signal */ 605 sock_shutdown(lo, 1);
560 /* Forcibly shutdown the socket causing all listeners
561 * to error
562 *
563 * FIXME: This code is duplicated from sys_shutdown, but
564 * there should be a more generic interface rather than
565 * calling socket ops directly here */
566 mutex_lock(&lo->tx_lock);
567 if (lo->sock) {
568 printk(KERN_WARNING "%s: shutting down socket\n",
569 lo->disk->disk_name);
570 lo->sock->ops->shutdown(lo->sock,
571 SEND_SHUTDOWN|RCV_SHUTDOWN);
572 lo->sock = NULL;
573 }
574 mutex_unlock(&lo->tx_lock);
575 file = lo->file; 606 file = lo->file;
576 lo->file = NULL; 607 lo->file = NULL;
577 nbd_clear_que(lo); 608 nbd_clear_que(lo);
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index 0f3e69302540..cc2b47240a8f 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -26,6 +26,7 @@
26#define NBD_PRINT_DEBUG _IO( 0xab, 6 ) 26#define NBD_PRINT_DEBUG _IO( 0xab, 6 )
27#define NBD_SET_SIZE_BLOCKS _IO( 0xab, 7 ) 27#define NBD_SET_SIZE_BLOCKS _IO( 0xab, 7 )
28#define NBD_DISCONNECT _IO( 0xab, 8 ) 28#define NBD_DISCONNECT _IO( 0xab, 8 )
29#define NBD_SET_TIMEOUT _IO( 0xab, 9 )
29 30
30enum { 31enum {
31 NBD_CMD_READ = 0, 32 NBD_CMD_READ = 0,
@@ -65,6 +66,7 @@ struct nbd_device {
65 int blksize; 66 int blksize;
66 u64 bytesize; 67 u64 bytesize;
67 pid_t pid; /* pid of nbd-client, if attached */ 68 pid_t pid; /* pid of nbd-client, if attached */
69 int xmit_timeout;
68}; 70};
69 71
70#endif 72#endif