aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2016-09-08 15:33:37 -0400
committerJens Axboe <axboe@fb.com>2016-09-08 16:01:32 -0400
commitfd8383fd88a2fd842a9431df5ed353bd7129eecc (patch)
tree34e886f1fbd2feb47c7adde499e181531baccd42
parent99e6b87ec2102b10e190c92ea9560bafcb744f86 (diff)
nbd: convert to blkmq
This moves NBD over to using blkmq, which allows us to get rid of the NBD wide queue lock and the async submit kthread. We will start with 1 hw queue for now, but I plan to add multiple tcp connection support in the future and we'll fix how we set the hwqueue's. Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/nbd.c337
1 files changed, 129 insertions, 208 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a9e398019f38..15e7c6740873 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -34,6 +34,7 @@
34#include <linux/kthread.h> 34#include <linux/kthread.h>
35#include <linux/types.h> 35#include <linux/types.h>
36#include <linux/debugfs.h> 36#include <linux/debugfs.h>
37#include <linux/blk-mq.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39#include <asm/types.h> 40#include <asm/types.h>
@@ -45,12 +46,8 @@ struct nbd_device {
45 struct socket * sock; /* If == NULL, device is not ready, yet */ 46 struct socket * sock; /* If == NULL, device is not ready, yet */
46 int magic; 47 int magic;
47 48
48 spinlock_t queue_lock; 49 atomic_t outstanding_cmds;
49 struct list_head queue_head; /* Requests waiting result */ 50 struct blk_mq_tag_set tag_set;
50 struct request *active_req;
51 wait_queue_head_t active_wq;
52 struct list_head waiting_queue; /* Requests to be sent */
53 wait_queue_head_t waiting_wq;
54 51
55 struct mutex tx_lock; 52 struct mutex tx_lock;
56 struct gendisk *disk; 53 struct gendisk *disk;
@@ -71,6 +68,11 @@ struct nbd_device {
71#endif 68#endif
72}; 69};
73 70
71struct nbd_cmd {
72 struct nbd_device *nbd;
73 struct list_head list;
74};
75
74#if IS_ENABLED(CONFIG_DEBUG_FS) 76#if IS_ENABLED(CONFIG_DEBUG_FS)
75static struct dentry *nbd_dbg_dir; 77static struct dentry *nbd_dbg_dir;
76#endif 78#endif
@@ -83,18 +85,6 @@ static unsigned int nbds_max = 16;
83static struct nbd_device *nbd_dev; 85static struct nbd_device *nbd_dev;
84static int max_part; 86static int max_part;
85 87
86/*
87 * Use just one lock (or at most 1 per NIC). Two arguments for this:
88 * 1. Each NIC is essentially a synchronization point for all servers
89 * accessed through that NIC so there's no need to have more locks
90 * than NICs anyway.
91 * 2. More locks lead to more "Dirty cache line bouncing" which will slow
92 * down each lock to the point where they're actually slower than just
93 * a single lock.
94 * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
95 */
96static DEFINE_SPINLOCK(nbd_lock);
97
98static inline struct device *nbd_to_dev(struct nbd_device *nbd) 88static inline struct device *nbd_to_dev(struct nbd_device *nbd)
99{ 89{
100 return disk_to_dev(nbd->disk); 90 return disk_to_dev(nbd->disk);
@@ -153,18 +143,17 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
153 return 0; 143 return 0;
154} 144}
155 145
156static void nbd_end_request(struct nbd_device *nbd, struct request *req) 146static void nbd_end_request(struct nbd_cmd *cmd)
157{ 147{
148 struct nbd_device *nbd = cmd->nbd;
149 struct request *req = blk_mq_rq_from_pdu(cmd);
158 int error = req->errors ? -EIO : 0; 150 int error = req->errors ? -EIO : 0;
159 struct request_queue *q = req->q;
160 unsigned long flags;
161 151
162 dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", req, 152 dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
163 error ? "failed" : "done"); 153 error ? "failed" : "done");
164 154
165 spin_lock_irqsave(q->queue_lock, flags); 155 atomic_dec(&nbd->outstanding_cmds);
166 __blk_end_request_all(req, error); 156 blk_mq_complete_request(req, error);
167 spin_unlock_irqrestore(q->queue_lock, flags);
168} 157}
169 158
170/* 159/*
@@ -193,7 +182,7 @@ static void nbd_xmit_timeout(unsigned long arg)
193 struct nbd_device *nbd = (struct nbd_device *)arg; 182 struct nbd_device *nbd = (struct nbd_device *)arg;
194 unsigned long flags; 183 unsigned long flags;
195 184
196 if (list_empty(&nbd->queue_head)) 185 if (!atomic_read(&nbd->outstanding_cmds))
197 return; 186 return;
198 187
199 spin_lock_irqsave(&nbd->sock_lock, flags); 188 spin_lock_irqsave(&nbd->sock_lock, flags);
@@ -273,8 +262,9 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
273} 262}
274 263
275/* always call with the tx_lock held */ 264/* always call with the tx_lock held */
276static int nbd_send_req(struct nbd_device *nbd, struct request *req) 265static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd)
277{ 266{
267 struct request *req = blk_mq_rq_from_pdu(cmd);
278 int result, flags; 268 int result, flags;
279 struct nbd_request request; 269 struct nbd_request request;
280 unsigned long size = blk_rq_bytes(req); 270 unsigned long size = blk_rq_bytes(req);
@@ -298,10 +288,10 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
298 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 288 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
299 request.len = htonl(size); 289 request.len = htonl(size);
300 } 290 }
301 memcpy(request.handle, &req, sizeof(req)); 291 memcpy(request.handle, &req->tag, sizeof(req->tag));
302 292
303 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 293 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
304 req, nbdcmd_to_ascii(type), 294 cmd, nbdcmd_to_ascii(type),
305 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 295 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
306 result = sock_xmit(nbd, 1, &request, sizeof(request), 296 result = sock_xmit(nbd, 1, &request, sizeof(request),
307 (type == NBD_CMD_WRITE) ? MSG_MORE : 0); 297 (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
@@ -323,7 +313,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
323 if (!rq_iter_last(bvec, iter)) 313 if (!rq_iter_last(bvec, iter))
324 flags = MSG_MORE; 314 flags = MSG_MORE;
325 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 315 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
326 req, bvec.bv_len); 316 cmd, bvec.bv_len);
327 result = sock_send_bvec(nbd, &bvec, flags); 317 result = sock_send_bvec(nbd, &bvec, flags);
328 if (result <= 0) { 318 if (result <= 0) {
329 dev_err(disk_to_dev(nbd->disk), 319 dev_err(disk_to_dev(nbd->disk),
@@ -336,29 +326,6 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
336 return 0; 326 return 0;
337} 327}
338 328
339static struct request *nbd_find_request(struct nbd_device *nbd,
340 struct request *xreq)
341{
342 struct request *req, *tmp;
343 int err;
344
345 err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
346 if (unlikely(err))
347 return ERR_PTR(err);
348
349 spin_lock(&nbd->queue_lock);
350 list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
351 if (req != xreq)
352 continue;
353 list_del_init(&req->queuelist);
354 spin_unlock(&nbd->queue_lock);
355 return req;
356 }
357 spin_unlock(&nbd->queue_lock);
358
359 return ERR_PTR(-ENOENT);
360}
361
362static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) 329static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
363{ 330{
364 int result; 331 int result;
@@ -370,11 +337,14 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
370} 337}
371 338
372/* NULL returned = something went wrong, inform userspace */ 339/* NULL returned = something went wrong, inform userspace */
373static struct request *nbd_read_stat(struct nbd_device *nbd) 340static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd)
374{ 341{
375 int result; 342 int result;
376 struct nbd_reply reply; 343 struct nbd_reply reply;
377 struct request *req; 344 struct nbd_cmd *cmd;
345 struct request *req = NULL;
346 u16 hwq;
347 int tag;
378 348
379 reply.magic = 0; 349 reply.magic = 0;
380 result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); 350 result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
@@ -390,25 +360,27 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
390 return ERR_PTR(-EPROTO); 360 return ERR_PTR(-EPROTO);
391 } 361 }
392 362
393 req = nbd_find_request(nbd, *(struct request **)reply.handle); 363 memcpy(&tag, reply.handle, sizeof(int));
394 if (IS_ERR(req)) {
395 result = PTR_ERR(req);
396 if (result != -ENOENT)
397 return ERR_PTR(result);
398 364
399 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n", 365 hwq = blk_mq_unique_tag_to_hwq(tag);
400 reply.handle); 366 if (hwq < nbd->tag_set.nr_hw_queues)
401 return ERR_PTR(-EBADR); 367 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
368 blk_mq_unique_tag_to_tag(tag));
369 if (!req || !blk_mq_request_started(req)) {
370 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
371 tag, req);
372 return ERR_PTR(-ENOENT);
402 } 373 }
374 cmd = blk_mq_rq_to_pdu(req);
403 375
404 if (ntohl(reply.error)) { 376 if (ntohl(reply.error)) {
405 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 377 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
406 ntohl(reply.error)); 378 ntohl(reply.error));
407 req->errors++; 379 req->errors++;
408 return req; 380 return cmd;
409 } 381 }
410 382
411 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 383 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
412 if (rq_data_dir(req) != WRITE) { 384 if (rq_data_dir(req) != WRITE) {
413 struct req_iterator iter; 385 struct req_iterator iter;
414 struct bio_vec bvec; 386 struct bio_vec bvec;
@@ -419,13 +391,13 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
419 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 391 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
420 result); 392 result);
421 req->errors++; 393 req->errors++;
422 return req; 394 return cmd;
423 } 395 }
424 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 396 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
425 req, bvec.bv_len); 397 cmd, bvec.bv_len);
426 } 398 }
427 } 399 }
428 return req; 400 return cmd;
429} 401}
430 402
431static ssize_t pid_show(struct device *dev, 403static ssize_t pid_show(struct device *dev,
@@ -444,7 +416,7 @@ static struct device_attribute pid_attr = {
444 416
445static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) 417static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
446{ 418{
447 struct request *req; 419 struct nbd_cmd *cmd;
448 int ret; 420 int ret;
449 421
450 BUG_ON(nbd->magic != NBD_MAGIC); 422 BUG_ON(nbd->magic != NBD_MAGIC);
@@ -460,13 +432,13 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
460 nbd_size_update(nbd, bdev); 432 nbd_size_update(nbd, bdev);
461 433
462 while (1) { 434 while (1) {
463 req = nbd_read_stat(nbd); 435 cmd = nbd_read_stat(nbd);
464 if (IS_ERR(req)) { 436 if (IS_ERR(cmd)) {
465 ret = PTR_ERR(req); 437 ret = PTR_ERR(cmd);
466 break; 438 break;
467 } 439 }
468 440
469 nbd_end_request(nbd, req); 441 nbd_end_request(cmd);
470 } 442 }
471 443
472 nbd_size_clear(nbd, bdev); 444 nbd_size_clear(nbd, bdev);
@@ -475,44 +447,37 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
475 return ret; 447 return ret;
476} 448}
477 449
478static void nbd_clear_que(struct nbd_device *nbd) 450static void nbd_clear_req(struct request *req, void *data, bool reserved)
479{ 451{
480 struct request *req; 452 struct nbd_cmd *cmd;
481 453
454 if (!blk_mq_request_started(req))
455 return;
456 cmd = blk_mq_rq_to_pdu(req);
457 req->errors++;
458 nbd_end_request(cmd);
459}
460
461static void nbd_clear_que(struct nbd_device *nbd)
462{
482 BUG_ON(nbd->magic != NBD_MAGIC); 463 BUG_ON(nbd->magic != NBD_MAGIC);
483 464
484 /* 465 /*
485 * Because we have set nbd->sock to NULL under the tx_lock, all 466 * Because we have set nbd->sock to NULL under the tx_lock, all
486 * modifications to the list must have completed by now. For 467 * modifications to the list must have completed by now.
487 * the same reason, the active_req must be NULL.
488 *
489 * As a consequence, we don't need to take the spin lock while
490 * purging the list here.
491 */ 468 */
492 BUG_ON(nbd->sock); 469 BUG_ON(nbd->sock);
493 BUG_ON(nbd->active_req);
494 470
495 while (!list_empty(&nbd->queue_head)) { 471 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
496 req = list_entry(nbd->queue_head.next, struct request,
497 queuelist);
498 list_del_init(&req->queuelist);
499 req->errors++;
500 nbd_end_request(nbd, req);
501 }
502
503 while (!list_empty(&nbd->waiting_queue)) {
504 req = list_entry(nbd->waiting_queue.next, struct request,
505 queuelist);
506 list_del_init(&req->queuelist);
507 req->errors++;
508 nbd_end_request(nbd, req);
509 }
510 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 472 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
511} 473}
512 474
513 475
514static void nbd_handle_req(struct nbd_device *nbd, struct request *req) 476static void nbd_handle_cmd(struct nbd_cmd *cmd)
515{ 477{
478 struct request *req = blk_mq_rq_from_pdu(cmd);
479 struct nbd_device *nbd = cmd->nbd;
480
516 if (req->cmd_type != REQ_TYPE_FS) 481 if (req->cmd_type != REQ_TYPE_FS)
517 goto error_out; 482 goto error_out;
518 483
@@ -526,6 +491,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
526 req->errors = 0; 491 req->errors = 0;
527 492
528 mutex_lock(&nbd->tx_lock); 493 mutex_lock(&nbd->tx_lock);
494 nbd->task_send = current;
529 if (unlikely(!nbd->sock)) { 495 if (unlikely(!nbd->sock)) {
530 mutex_unlock(&nbd->tx_lock); 496 mutex_unlock(&nbd->tx_lock);
531 dev_err(disk_to_dev(nbd->disk), 497 dev_err(disk_to_dev(nbd->disk),
@@ -533,106 +499,34 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
533 goto error_out; 499 goto error_out;
534 } 500 }
535 501
536 nbd->active_req = req; 502 if (nbd->xmit_timeout && !atomic_read(&nbd->outstanding_cmds))
537
538 if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
539 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); 503 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
540 504
541 if (nbd_send_req(nbd, req) != 0) { 505 atomic_inc(&nbd->outstanding_cmds);
506 if (nbd_send_cmd(nbd, cmd) != 0) {
542 dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); 507 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
543 req->errors++; 508 req->errors++;
544 nbd_end_request(nbd, req); 509 nbd_end_request(cmd);
545 } else {
546 spin_lock(&nbd->queue_lock);
547 list_add_tail(&req->queuelist, &nbd->queue_head);
548 spin_unlock(&nbd->queue_lock);
549 } 510 }
550 511
551 nbd->active_req = NULL; 512 nbd->task_send = NULL;
552 mutex_unlock(&nbd->tx_lock); 513 mutex_unlock(&nbd->tx_lock);
553 wake_up_all(&nbd->active_wq);
554 514
555 return; 515 return;
556 516
557error_out: 517error_out:
558 req->errors++; 518 req->errors++;
559 nbd_end_request(nbd, req); 519 nbd_end_request(cmd);
560}
561
562static int nbd_thread_send(void *data)
563{
564 struct nbd_device *nbd = data;
565 struct request *req;
566
567 nbd->task_send = current;
568
569 set_user_nice(current, MIN_NICE);
570 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
571 /* wait for something to do */
572 wait_event_interruptible(nbd->waiting_wq,
573 kthread_should_stop() ||
574 !list_empty(&nbd->waiting_queue));
575
576 /* extract request */
577 if (list_empty(&nbd->waiting_queue))
578 continue;
579
580 spin_lock_irq(&nbd->queue_lock);
581 req = list_entry(nbd->waiting_queue.next, struct request,
582 queuelist);
583 list_del_init(&req->queuelist);
584 spin_unlock_irq(&nbd->queue_lock);
585
586 /* handle request */
587 nbd_handle_req(nbd, req);
588 }
589
590 nbd->task_send = NULL;
591
592 return 0;
593} 520}
594 521
595/* 522static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
596 * We always wait for result of write, for now. It would be nice to make it optional 523 const struct blk_mq_queue_data *bd)
597 * in future
598 * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
599 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
600 */
601
602static void nbd_request_handler(struct request_queue *q)
603 __releases(q->queue_lock) __acquires(q->queue_lock)
604{ 524{
605 struct request *req; 525 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
606
607 while ((req = blk_fetch_request(q)) != NULL) {
608 struct nbd_device *nbd;
609
610 spin_unlock_irq(q->queue_lock);
611
612 nbd = req->rq_disk->private_data;
613
614 BUG_ON(nbd->magic != NBD_MAGIC);
615 526
616 dev_dbg(nbd_to_dev(nbd), "request %p: dequeued (flags=%x)\n", 527 blk_mq_start_request(bd->rq);
617 req, req->cmd_type); 528 nbd_handle_cmd(cmd);
618 529 return BLK_MQ_RQ_QUEUE_OK;
619 if (unlikely(!nbd->sock)) {
620 dev_err_ratelimited(disk_to_dev(nbd->disk),
621 "Attempted send on closed socket\n");
622 req->errors++;
623 nbd_end_request(nbd, req);
624 spin_lock_irq(q->queue_lock);
625 continue;
626 }
627
628 spin_lock_irq(&nbd->queue_lock);
629 list_add_tail(&req->queuelist, &nbd->waiting_queue);
630 spin_unlock_irq(&nbd->queue_lock);
631
632 wake_up(&nbd->waiting_wq);
633
634 spin_lock_irq(q->queue_lock);
635 }
636} 530}
637 531
638static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock) 532static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
@@ -700,33 +594,37 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
700{ 594{
701 switch (cmd) { 595 switch (cmd) {
702 case NBD_DISCONNECT: { 596 case NBD_DISCONNECT: {
703 struct request sreq; 597 struct request *sreq;
704 598
705 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 599 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
706 if (!nbd->sock) 600 if (!nbd->sock)
707 return -EINVAL; 601 return -EINVAL;
708 602
603 sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0);
604 if (!sreq)
605 return -ENOMEM;
606
709 mutex_unlock(&nbd->tx_lock); 607 mutex_unlock(&nbd->tx_lock);
710 fsync_bdev(bdev); 608 fsync_bdev(bdev);
711 mutex_lock(&nbd->tx_lock); 609 mutex_lock(&nbd->tx_lock);
712 blk_rq_init(NULL, &sreq); 610 sreq->cmd_type = REQ_TYPE_DRV_PRIV;
713 sreq.cmd_type = REQ_TYPE_DRV_PRIV;
714 611
715 /* Check again after getting mutex back. */ 612 /* Check again after getting mutex back. */
716 if (!nbd->sock) 613 if (!nbd->sock) {
614 blk_mq_free_request(sreq);
717 return -EINVAL; 615 return -EINVAL;
616 }
718 617
719 nbd->disconnect = true; 618 nbd->disconnect = true;
720 619
721 nbd_send_req(nbd, &sreq); 620 nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq));
621 blk_mq_free_request(sreq);
722 return 0; 622 return 0;
723 } 623 }
724 624
725 case NBD_CLEAR_SOCK: 625 case NBD_CLEAR_SOCK:
726 sock_shutdown(nbd); 626 sock_shutdown(nbd);
727 nbd_clear_que(nbd); 627 nbd_clear_que(nbd);
728 BUG_ON(!list_empty(&nbd->queue_head));
729 BUG_ON(!list_empty(&nbd->waiting_queue));
730 kill_bdev(bdev); 628 kill_bdev(bdev);
731 return 0; 629 return 0;
732 630
@@ -772,7 +670,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
772 return 0; 670 return 0;
773 671
774 case NBD_DO_IT: { 672 case NBD_DO_IT: {
775 struct task_struct *thread;
776 int error; 673 int error;
777 674
778 if (nbd->task_recv) 675 if (nbd->task_recv)
@@ -786,18 +683,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
786 683
787 nbd_parse_flags(nbd, bdev); 684 nbd_parse_flags(nbd, bdev);
788 685
789 thread = kthread_run(nbd_thread_send, nbd, "%s",
790 nbd_name(nbd));
791 if (IS_ERR(thread)) {
792 mutex_lock(&nbd->tx_lock);
793 nbd->task_recv = NULL;
794 return PTR_ERR(thread);
795 }
796
797 nbd_dev_dbg_init(nbd); 686 nbd_dev_dbg_init(nbd);
798 error = nbd_thread_recv(nbd, bdev); 687 error = nbd_thread_recv(nbd, bdev);
799 nbd_dev_dbg_close(nbd); 688 nbd_dev_dbg_close(nbd);
800 kthread_stop(thread);
801 689
802 mutex_lock(&nbd->tx_lock); 690 mutex_lock(&nbd->tx_lock);
803 nbd->task_recv = NULL; 691 nbd->task_recv = NULL;
@@ -825,10 +713,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
825 return 0; 713 return 0;
826 714
827 case NBD_PRINT_DEBUG: 715 case NBD_PRINT_DEBUG:
828 dev_info(disk_to_dev(nbd->disk), 716 /*
829 "next = %p, prev = %p, head = %p\n", 717 * For compatibility only, we no longer keep a list of
830 nbd->queue_head.next, nbd->queue_head.prev, 718 * outstanding requests.
831 &nbd->queue_head); 719 */
832 return 0; 720 return 0;
833 } 721 }
834 return -ENOTTY; 722 return -ENOTTY;
@@ -987,6 +875,23 @@ static void nbd_dbg_close(void)
987 875
988#endif 876#endif
989 877
878static int nbd_init_request(void *data, struct request *rq,
879 unsigned int hctx_idx, unsigned int request_idx,
880 unsigned int numa_node)
881{
882 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
883
884 cmd->nbd = data;
885 INIT_LIST_HEAD(&cmd->list);
886 return 0;
887}
888
889static struct blk_mq_ops nbd_mq_ops = {
890 .queue_rq = nbd_queue_rq,
891 .map_queue = blk_mq_map_queue,
892 .init_request = nbd_init_request,
893};
894
990/* 895/*
991 * And here should be modules and kernel interface 896 * And here should be modules and kernel interface
992 * (Just smiley confuses emacs :-) 897 * (Just smiley confuses emacs :-)
@@ -1035,16 +940,34 @@ static int __init nbd_init(void)
1035 if (!disk) 940 if (!disk)
1036 goto out; 941 goto out;
1037 nbd_dev[i].disk = disk; 942 nbd_dev[i].disk = disk;
943
944 nbd_dev[i].tag_set.ops = &nbd_mq_ops;
945 nbd_dev[i].tag_set.nr_hw_queues = 1;
946 nbd_dev[i].tag_set.queue_depth = 128;
947 nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
948 nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
949 nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
950 BLK_MQ_F_SG_MERGE;
951 nbd_dev[i].tag_set.driver_data = &nbd_dev[i];
952
953 err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
954 if (err) {
955 put_disk(disk);
956 goto out;
957 }
958
1038 /* 959 /*
1039 * The new linux 2.5 block layer implementation requires 960 * The new linux 2.5 block layer implementation requires
1040 * every gendisk to have its very own request_queue struct. 961 * every gendisk to have its very own request_queue struct.
1041 * These structs are big so we dynamically allocate them. 962 * These structs are big so we dynamically allocate them.
1042 */ 963 */
1043 disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock); 964 disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
1044 if (!disk->queue) { 965 if (!disk->queue) {
966 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1045 put_disk(disk); 967 put_disk(disk);
1046 goto out; 968 goto out;
1047 } 969 }
970
1048 /* 971 /*
1049 * Tell the block layer that we are not a rotational device 972 * Tell the block layer that we are not a rotational device
1050 */ 973 */
@@ -1069,16 +992,12 @@ static int __init nbd_init(void)
1069 for (i = 0; i < nbds_max; i++) { 992 for (i = 0; i < nbds_max; i++) {
1070 struct gendisk *disk = nbd_dev[i].disk; 993 struct gendisk *disk = nbd_dev[i].disk;
1071 nbd_dev[i].magic = NBD_MAGIC; 994 nbd_dev[i].magic = NBD_MAGIC;
1072 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
1073 spin_lock_init(&nbd_dev[i].queue_lock);
1074 spin_lock_init(&nbd_dev[i].sock_lock); 995 spin_lock_init(&nbd_dev[i].sock_lock);
1075 INIT_LIST_HEAD(&nbd_dev[i].queue_head);
1076 mutex_init(&nbd_dev[i].tx_lock); 996 mutex_init(&nbd_dev[i].tx_lock);
1077 init_timer(&nbd_dev[i].timeout_timer); 997 init_timer(&nbd_dev[i].timeout_timer);
1078 nbd_dev[i].timeout_timer.function = nbd_xmit_timeout; 998 nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
1079 nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i]; 999 nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
1080 init_waitqueue_head(&nbd_dev[i].active_wq); 1000 atomic_set(&nbd_dev[i].outstanding_cmds, 0);
1081 init_waitqueue_head(&nbd_dev[i].waiting_wq);
1082 disk->major = NBD_MAJOR; 1001 disk->major = NBD_MAJOR;
1083 disk->first_minor = i << part_shift; 1002 disk->first_minor = i << part_shift;
1084 disk->fops = &nbd_fops; 1003 disk->fops = &nbd_fops;
@@ -1091,6 +1010,7 @@ static int __init nbd_init(void)
1091 return 0; 1010 return 0;
1092out: 1011out:
1093 while (i--) { 1012 while (i--) {
1013 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1094 blk_cleanup_queue(nbd_dev[i].disk->queue); 1014 blk_cleanup_queue(nbd_dev[i].disk->queue);
1095 put_disk(nbd_dev[i].disk); 1015 put_disk(nbd_dev[i].disk);
1096 } 1016 }
@@ -1110,6 +1030,7 @@ static void __exit nbd_cleanup(void)
1110 if (disk) { 1030 if (disk) {
1111 del_gendisk(disk); 1031 del_gendisk(disk);
1112 blk_cleanup_queue(disk->queue); 1032 blk_cleanup_queue(disk->queue);
1033 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1113 put_disk(disk); 1034 put_disk(disk);
1114 } 1035 }
1115 } 1036 }