diff options
author | Laurent Vivier <Laurent.Vivier@bull.net> | 2008-04-29 04:02:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-29 11:06:23 -0400 |
commit | 48cf6061b30205b29b306bf9bc22dd6f0b091461 (patch) | |
tree | bba7827177587a17160c29392651aa27aa7f98fd | |
parent | 8c4dd6068221cd1d0d90490ace80eb4344914a8c (diff) |
NBD: allow nbd to be used locally
This patch allows Network Block Device to be mounted locally (nbd-client to
nbd-server over 127.0.0.1).
It creates a kthread to avoid the deadlock described in NBD tools
documentation. So, if nbd-client hangs waiting for pages, the kblockd thread
can continue its work and free pages.
I have tested the patch to verify that it avoids the hang that always occurs
when writing to a localhost nbd connection. I have also tested to verify that
no performance degradation results from the additional thread and queue.
Patch originally from Laurent Vivier.
Signed-off-by: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/block/nbd.c | 144 | ||||
-rw-r--r-- | include/linux/nbd.h | 4 |
2 files changed, 98 insertions, 50 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 60cc54368b66..8e33de6bea33 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
30 | #include <net/sock.h> | 30 | #include <net/sock.h> |
31 | #include <linux/net.h> | 31 | #include <linux/net.h> |
32 | #include <linux/kthread.h> | ||
32 | 33 | ||
33 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
34 | #include <asm/system.h> | 35 | #include <asm/system.h> |
@@ -441,6 +442,85 @@ static void nbd_clear_que(struct nbd_device *lo) | |||
441 | } | 442 | } |
442 | 443 | ||
443 | 444 | ||
445 | static void nbd_handle_req(struct nbd_device *lo, struct request *req) | ||
446 | { | ||
447 | if (!blk_fs_request(req)) | ||
448 | goto error_out; | ||
449 | |||
450 | nbd_cmd(req) = NBD_CMD_READ; | ||
451 | if (rq_data_dir(req) == WRITE) { | ||
452 | nbd_cmd(req) = NBD_CMD_WRITE; | ||
453 | if (lo->flags & NBD_READ_ONLY) { | ||
454 | printk(KERN_ERR "%s: Write on read-only\n", | ||
455 | lo->disk->disk_name); | ||
456 | goto error_out; | ||
457 | } | ||
458 | } | ||
459 | |||
460 | req->errors = 0; | ||
461 | |||
462 | mutex_lock(&lo->tx_lock); | ||
463 | if (unlikely(!lo->sock)) { | ||
464 | mutex_unlock(&lo->tx_lock); | ||
465 | printk(KERN_ERR "%s: Attempted send on closed socket\n", | ||
466 | lo->disk->disk_name); | ||
467 | req->errors++; | ||
468 | nbd_end_request(req); | ||
469 | return; | ||
470 | } | ||
471 | |||
472 | lo->active_req = req; | ||
473 | |||
474 | if (nbd_send_req(lo, req) != 0) { | ||
475 | printk(KERN_ERR "%s: Request send failed\n", | ||
476 | lo->disk->disk_name); | ||
477 | req->errors++; | ||
478 | nbd_end_request(req); | ||
479 | } else { | ||
480 | spin_lock(&lo->queue_lock); | ||
481 | list_add(&req->queuelist, &lo->queue_head); | ||
482 | spin_unlock(&lo->queue_lock); | ||
483 | } | ||
484 | |||
485 | lo->active_req = NULL; | ||
486 | mutex_unlock(&lo->tx_lock); | ||
487 | wake_up_all(&lo->active_wq); | ||
488 | |||
489 | return; | ||
490 | |||
491 | error_out: | ||
492 | req->errors++; | ||
493 | nbd_end_request(req); | ||
494 | } | ||
495 | |||
496 | static int nbd_thread(void *data) | ||
497 | { | ||
498 | struct nbd_device *lo = data; | ||
499 | struct request *req; | ||
500 | |||
501 | set_user_nice(current, -20); | ||
502 | while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { | ||
503 | /* wait for something to do */ | ||
504 | wait_event_interruptible(lo->waiting_wq, | ||
505 | kthread_should_stop() || | ||
506 | !list_empty(&lo->waiting_queue)); | ||
507 | |||
508 | /* extract request */ | ||
509 | if (list_empty(&lo->waiting_queue)) | ||
510 | continue; | ||
511 | |||
512 | spin_lock_irq(&lo->queue_lock); | ||
513 | req = list_entry(lo->waiting_queue.next, struct request, | ||
514 | queuelist); | ||
515 | list_del_init(&req->queuelist); | ||
516 | spin_unlock_irq(&lo->queue_lock); | ||
517 | |||
518 | /* handle request */ | ||
519 | nbd_handle_req(lo, req); | ||
520 | } | ||
521 | return 0; | ||
522 | } | ||
523 | |||
444 | /* | 524 | /* |
445 | * We always wait for result of write, for now. It would be nice to make it optional | 525 | * We always wait for result of write, for now. It would be nice to make it optional |
446 | * in future | 526 | * in future |
@@ -456,65 +536,23 @@ static void do_nbd_request(struct request_queue * q) | |||
456 | struct nbd_device *lo; | 536 | struct nbd_device *lo; |
457 | 537 | ||
458 | blkdev_dequeue_request(req); | 538 | blkdev_dequeue_request(req); |
539 | |||
540 | spin_unlock_irq(q->queue_lock); | ||
541 | |||
459 | dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", | 542 | dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", |
460 | req->rq_disk->disk_name, req, req->cmd_type); | 543 | req->rq_disk->disk_name, req, req->cmd_type); |
461 | 544 | ||
462 | if (!blk_fs_request(req)) | ||
463 | goto error_out; | ||
464 | |||
465 | lo = req->rq_disk->private_data; | 545 | lo = req->rq_disk->private_data; |
466 | 546 | ||
467 | BUG_ON(lo->magic != LO_MAGIC); | 547 | BUG_ON(lo->magic != LO_MAGIC); |
468 | 548 | ||
469 | nbd_cmd(req) = NBD_CMD_READ; | 549 | spin_lock_irq(&lo->queue_lock); |
470 | if (rq_data_dir(req) == WRITE) { | 550 | list_add_tail(&req->queuelist, &lo->waiting_queue); |
471 | nbd_cmd(req) = NBD_CMD_WRITE; | 551 | spin_unlock_irq(&lo->queue_lock); |
472 | if (lo->flags & NBD_READ_ONLY) { | ||
473 | printk(KERN_ERR "%s: Write on read-only\n", | ||
474 | lo->disk->disk_name); | ||
475 | goto error_out; | ||
476 | } | ||
477 | } | ||
478 | |||
479 | req->errors = 0; | ||
480 | spin_unlock_irq(q->queue_lock); | ||
481 | |||
482 | mutex_lock(&lo->tx_lock); | ||
483 | if (unlikely(!lo->sock)) { | ||
484 | mutex_unlock(&lo->tx_lock); | ||
485 | printk(KERN_ERR "%s: Attempted send on closed socket\n", | ||
486 | lo->disk->disk_name); | ||
487 | req->errors++; | ||
488 | nbd_end_request(req); | ||
489 | spin_lock_irq(q->queue_lock); | ||
490 | continue; | ||
491 | } | ||
492 | 552 | ||
493 | lo->active_req = req; | 553 | wake_up(&lo->waiting_wq); |
494 | |||
495 | if (nbd_send_req(lo, req) != 0) { | ||
496 | printk(KERN_ERR "%s: Request send failed\n", | ||
497 | lo->disk->disk_name); | ||
498 | req->errors++; | ||
499 | nbd_end_request(req); | ||
500 | } else { | ||
501 | spin_lock(&lo->queue_lock); | ||
502 | list_add(&req->queuelist, &lo->queue_head); | ||
503 | spin_unlock(&lo->queue_lock); | ||
504 | } | ||
505 | |||
506 | lo->active_req = NULL; | ||
507 | mutex_unlock(&lo->tx_lock); | ||
508 | wake_up_all(&lo->active_wq); | ||
509 | 554 | ||
510 | spin_lock_irq(q->queue_lock); | 555 | spin_lock_irq(q->queue_lock); |
511 | continue; | ||
512 | |||
513 | error_out: | ||
514 | req->errors++; | ||
515 | spin_unlock(q->queue_lock); | ||
516 | nbd_end_request(req); | ||
517 | spin_lock(q->queue_lock); | ||
518 | } | 556 | } |
519 | } | 557 | } |
520 | 558 | ||
@@ -524,6 +562,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, | |||
524 | struct nbd_device *lo = inode->i_bdev->bd_disk->private_data; | 562 | struct nbd_device *lo = inode->i_bdev->bd_disk->private_data; |
525 | int error; | 563 | int error; |
526 | struct request sreq ; | 564 | struct request sreq ; |
565 | struct task_struct *thread; | ||
527 | 566 | ||
528 | if (!capable(CAP_SYS_ADMIN)) | 567 | if (!capable(CAP_SYS_ADMIN)) |
529 | return -EPERM; | 568 | return -EPERM; |
@@ -606,7 +645,12 @@ static int nbd_ioctl(struct inode *inode, struct file *file, | |||
606 | case NBD_DO_IT: | 645 | case NBD_DO_IT: |
607 | if (!lo->file) | 646 | if (!lo->file) |
608 | return -EINVAL; | 647 | return -EINVAL; |
648 | thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); | ||
649 | if (IS_ERR(thread)) | ||
650 | return PTR_ERR(thread); | ||
651 | wake_up_process(thread); | ||
609 | error = nbd_do_it(lo); | 652 | error = nbd_do_it(lo); |
653 | kthread_stop(thread); | ||
610 | if (error) | 654 | if (error) |
611 | return error; | 655 | return error; |
612 | sock_shutdown(lo, 1); | 656 | sock_shutdown(lo, 1); |
@@ -695,10 +739,12 @@ static int __init nbd_init(void) | |||
695 | nbd_dev[i].file = NULL; | 739 | nbd_dev[i].file = NULL; |
696 | nbd_dev[i].magic = LO_MAGIC; | 740 | nbd_dev[i].magic = LO_MAGIC; |
697 | nbd_dev[i].flags = 0; | 741 | nbd_dev[i].flags = 0; |
742 | INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); | ||
698 | spin_lock_init(&nbd_dev[i].queue_lock); | 743 | spin_lock_init(&nbd_dev[i].queue_lock); |
699 | INIT_LIST_HEAD(&nbd_dev[i].queue_head); | 744 | INIT_LIST_HEAD(&nbd_dev[i].queue_head); |
700 | mutex_init(&nbd_dev[i].tx_lock); | 745 | mutex_init(&nbd_dev[i].tx_lock); |
701 | init_waitqueue_head(&nbd_dev[i].active_wq); | 746 | init_waitqueue_head(&nbd_dev[i].active_wq); |
747 | init_waitqueue_head(&nbd_dev[i].waiting_wq); | ||
702 | nbd_dev[i].blksize = 1024; | 748 | nbd_dev[i].blksize = 1024; |
703 | nbd_dev[i].bytesize = 0; | 749 | nbd_dev[i].bytesize = 0; |
704 | disk->major = NBD_MAJOR; | 750 | disk->major = NBD_MAJOR; |
diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 986572081e19..69075517c511 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h | |||
@@ -56,9 +56,11 @@ struct nbd_device { | |||
56 | int magic; | 56 | int magic; |
57 | 57 | ||
58 | spinlock_t queue_lock; | 58 | spinlock_t queue_lock; |
59 | struct list_head queue_head;/* Requests are added here... */ | 59 | struct list_head queue_head; /* Requests waiting result */ |
60 | struct request *active_req; | 60 | struct request *active_req; |
61 | wait_queue_head_t active_wq; | 61 | wait_queue_head_t active_wq; |
62 | struct list_head waiting_queue; /* Requests to be sent */ | ||
63 | wait_queue_head_t waiting_wq; | ||
62 | 64 | ||
63 | struct mutex tx_lock; | 65 | struct mutex tx_lock; |
64 | struct gendisk *disk; | 66 | struct gendisk *disk; |