diff options
author | Jeff Dike <jdike@addtoit.com> | 2007-05-06 17:51:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-07 15:13:03 -0400 |
commit | a0044bdf60c212366a314da09ca624cb315906e2 (patch) | |
tree | c72990bc90506b8bc64f2e7c445fe7762450d5b2 /arch | |
parent | a61f334fd2864b9b040f7e882726426ed7e8a317 (diff) |
uml: batch I/O requests
Send as many I/O requests to the I/O thread as possible, even though it will
still only handle one at a time. This provides an opportunity to reduce
latency by starting one request before the previous one has been finished in
the driver.
Request handling is somewhat modernized by requesting sg pieces of a request
and handling them separately, finishing off the entire request after all the
pieces are done.
When a request queue stalls, normally because its pipe to the I/O thread is
full, it is put on the restart list. This list is processed by starting up
the queues on it whenever there is some indication that progress might be
possible again. Currently, this happens in the driver interrupt routine.
Some requests have been finished, so there is likely to be room in the pipe
again.
This almost doubles throughput when copying data between devices, but made no
noticable difference on anything else I tried.
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/um/drivers/ubd_kern.c | 176 |
1 files changed, 99 insertions, 77 deletions
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 6d163c9e2885..5ef47b73ce99 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c | |||
@@ -149,7 +149,10 @@ struct cow { | |||
149 | int data_offset; | 149 | int data_offset; |
150 | }; | 150 | }; |
151 | 151 | ||
152 | #define MAX_SG 64 | ||
153 | |||
152 | struct ubd { | 154 | struct ubd { |
155 | struct list_head restart; | ||
153 | /* name (and fd, below) of the file opened for writing, either the | 156 | /* name (and fd, below) of the file opened for writing, either the |
154 | * backing or the cow file. */ | 157 | * backing or the cow file. */ |
155 | char *file; | 158 | char *file; |
@@ -164,7 +167,9 @@ struct ubd { | |||
164 | struct platform_device pdev; | 167 | struct platform_device pdev; |
165 | struct request_queue *queue; | 168 | struct request_queue *queue; |
166 | spinlock_t lock; | 169 | spinlock_t lock; |
167 | int active; | 170 | struct scatterlist sg[MAX_SG]; |
171 | struct request *request; | ||
172 | int start_sg, end_sg; | ||
168 | }; | 173 | }; |
169 | 174 | ||
170 | #define DEFAULT_COW { \ | 175 | #define DEFAULT_COW { \ |
@@ -186,7 +191,9 @@ struct ubd { | |||
186 | .shared = 0, \ | 191 | .shared = 0, \ |
187 | .cow = DEFAULT_COW, \ | 192 | .cow = DEFAULT_COW, \ |
188 | .lock = SPIN_LOCK_UNLOCKED, \ | 193 | .lock = SPIN_LOCK_UNLOCKED, \ |
189 | .active = 0, \ | 194 | .request = NULL, \ |
195 | .start_sg = 0, \ | ||
196 | .end_sg = 0, \ | ||
190 | } | 197 | } |
191 | 198 | ||
192 | /* Protected by ubd_lock */ | 199 | /* Protected by ubd_lock */ |
@@ -466,60 +473,67 @@ static void do_ubd_request(request_queue_t * q); | |||
466 | /* Only changed by ubd_init, which is an initcall. */ | 473 | /* Only changed by ubd_init, which is an initcall. */ |
467 | int thread_fd = -1; | 474 | int thread_fd = -1; |
468 | 475 | ||
469 | /* call ubd_finish if you need to serialize */ | 476 | static void ubd_end_request(struct request *req, int bytes, int uptodate) |
470 | static void __ubd_finish(struct request *req, int error) | ||
471 | { | 477 | { |
472 | int nsect; | 478 | if (!end_that_request_first(req, uptodate, bytes >> 9)) { |
473 | 479 | struct ubd *dev = req->rq_disk->private_data; | |
474 | if(error){ | 480 | unsigned long flags; |
475 | end_request(req, 0); | 481 | |
476 | return; | 482 | add_disk_randomness(req->rq_disk); |
483 | spin_lock_irqsave(&dev->lock, flags); | ||
484 | end_that_request_last(req, uptodate); | ||
485 | spin_unlock_irqrestore(&dev->lock, flags); | ||
477 | } | 486 | } |
478 | nsect = req->current_nr_sectors; | ||
479 | req->sector += nsect; | ||
480 | req->buffer += nsect << 9; | ||
481 | req->errors = 0; | ||
482 | req->nr_sectors -= nsect; | ||
483 | req->current_nr_sectors = 0; | ||
484 | end_request(req, 1); | ||
485 | } | 487 | } |
486 | 488 | ||
487 | /* Callable only from interrupt context - otherwise you need to do | 489 | /* Callable only from interrupt context - otherwise you need to do |
488 | * spin_lock_irq()/spin_lock_irqsave() */ | 490 | * spin_lock_irq()/spin_lock_irqsave() */ |
489 | static inline void ubd_finish(struct request *req, int error) | 491 | static inline void ubd_finish(struct request *req, int bytes) |
490 | { | 492 | { |
491 | struct ubd *dev = req->rq_disk->private_data; | 493 | if(bytes < 0){ |
492 | 494 | ubd_end_request(req, 0, 0); | |
493 | spin_lock(&dev->lock); | 495 | return; |
494 | __ubd_finish(req, error); | 496 | } |
495 | spin_unlock(&dev->lock); | 497 | ubd_end_request(req, bytes, 1); |
496 | } | 498 | } |
497 | 499 | ||
500 | static LIST_HEAD(restart); | ||
501 | |||
498 | /* XXX - move this inside ubd_intr. */ | 502 | /* XXX - move this inside ubd_intr. */ |
499 | /* Called without dev->lock held, and only in interrupt context. */ | 503 | /* Called without dev->lock held, and only in interrupt context. */ |
500 | static void ubd_handler(void) | 504 | static void ubd_handler(void) |
501 | { | 505 | { |
502 | struct io_thread_req req; | 506 | struct io_thread_req req; |
503 | struct request *rq; | 507 | struct request *rq; |
504 | struct ubd *dev; | 508 | struct ubd *ubd; |
509 | struct list_head *list, *next_ele; | ||
510 | unsigned long flags; | ||
505 | int n; | 511 | int n; |
506 | 512 | ||
507 | n = os_read_file_k(thread_fd, &req, sizeof(req)); | 513 | while(1){ |
508 | if(n != sizeof(req)){ | 514 | n = os_read_file_k(thread_fd, &req, sizeof(req)); |
509 | printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " | 515 | if(n != sizeof(req)){ |
510 | "err = %d\n", os_getpid(), -n); | 516 | if(n == -EAGAIN) |
511 | return; | 517 | break; |
512 | } | 518 | printk(KERN_ERR "spurious interrupt in ubd_handler, " |
513 | 519 | "err = %d\n", -n); | |
514 | rq = req.req; | 520 | return; |
515 | dev = rq->rq_disk->private_data; | 521 | } |
516 | dev->active = 0; | ||
517 | 522 | ||
518 | ubd_finish(rq, req.error); | 523 | rq = req.req; |
524 | rq->nr_sectors -= req.length >> 9; | ||
525 | if(rq->nr_sectors == 0) | ||
526 | ubd_finish(rq, rq->hard_nr_sectors << 9); | ||
527 | } | ||
519 | reactivate_fd(thread_fd, UBD_IRQ); | 528 | reactivate_fd(thread_fd, UBD_IRQ); |
520 | spin_lock(&dev->lock); | 529 | |
521 | do_ubd_request(dev->queue); | 530 | list_for_each_safe(list, next_ele, &restart){ |
522 | spin_unlock(&dev->lock); | 531 | ubd = container_of(list, struct ubd, restart); |
532 | list_del_init(&ubd->restart); | ||
533 | spin_lock_irqsave(&ubd->lock, flags); | ||
534 | do_ubd_request(ubd->queue); | ||
535 | spin_unlock_irqrestore(&ubd->lock, flags); | ||
536 | } | ||
523 | } | 537 | } |
524 | 538 | ||
525 | static irqreturn_t ubd_intr(int irq, void *dev) | 539 | static irqreturn_t ubd_intr(int irq, void *dev) |
@@ -684,6 +698,8 @@ static int ubd_add(int n, char **error_out) | |||
684 | 698 | ||
685 | ubd_dev->size = ROUND_BLOCK(ubd_dev->size); | 699 | ubd_dev->size = ROUND_BLOCK(ubd_dev->size); |
686 | 700 | ||
701 | INIT_LIST_HEAD(&ubd_dev->restart); | ||
702 | |||
687 | err = -ENOMEM; | 703 | err = -ENOMEM; |
688 | ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); | 704 | ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); |
689 | if (ubd_dev->queue == NULL) { | 705 | if (ubd_dev->queue == NULL) { |
@@ -692,6 +708,7 @@ static int ubd_add(int n, char **error_out) | |||
692 | } | 708 | } |
693 | ubd_dev->queue->queuedata = ubd_dev; | 709 | ubd_dev->queue->queuedata = ubd_dev; |
694 | 710 | ||
711 | blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG); | ||
695 | err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]); | 712 | err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]); |
696 | if(err){ | 713 | if(err){ |
697 | *error_out = "Failed to register device"; | 714 | *error_out = "Failed to register device"; |
@@ -1029,26 +1046,16 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, | |||
1029 | } | 1046 | } |
1030 | 1047 | ||
1031 | /* Called with dev->lock held */ | 1048 | /* Called with dev->lock held */ |
1032 | static int prepare_request(struct request *req, struct io_thread_req *io_req) | 1049 | static void prepare_request(struct request *req, struct io_thread_req *io_req, |
1050 | unsigned long long offset, int page_offset, | ||
1051 | int len, struct page *page) | ||
1033 | { | 1052 | { |
1034 | struct gendisk *disk = req->rq_disk; | 1053 | struct gendisk *disk = req->rq_disk; |
1035 | struct ubd *ubd_dev = disk->private_data; | 1054 | struct ubd *ubd_dev = disk->private_data; |
1036 | __u64 offset; | ||
1037 | int len; | ||
1038 | |||
1039 | /* This should be impossible now */ | ||
1040 | if((rq_data_dir(req) == WRITE) && !ubd_dev->openflags.w){ | ||
1041 | printk("Write attempted on readonly ubd device %s\n", | ||
1042 | disk->disk_name); | ||
1043 | end_request(req, 0); | ||
1044 | return(1); | ||
1045 | } | ||
1046 | |||
1047 | offset = ((__u64) req->sector) << 9; | ||
1048 | len = req->current_nr_sectors << 9; | ||
1049 | 1055 | ||
1050 | io_req->req = req; | 1056 | io_req->req = req; |
1051 | io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : ubd_dev->fd; | 1057 | io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : |
1058 | ubd_dev->fd; | ||
1052 | io_req->fds[1] = ubd_dev->fd; | 1059 | io_req->fds[1] = ubd_dev->fd; |
1053 | io_req->cow_offset = -1; | 1060 | io_req->cow_offset = -1; |
1054 | io_req->offset = offset; | 1061 | io_req->offset = offset; |
@@ -1059,14 +1066,13 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req) | |||
1059 | io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; | 1066 | io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; |
1060 | io_req->offsets[0] = 0; | 1067 | io_req->offsets[0] = 0; |
1061 | io_req->offsets[1] = ubd_dev->cow.data_offset; | 1068 | io_req->offsets[1] = ubd_dev->cow.data_offset; |
1062 | io_req->buffer = req->buffer; | 1069 | io_req->buffer = page_address(page) + page_offset; |
1063 | io_req->sectorsize = 1 << 9; | 1070 | io_req->sectorsize = 1 << 9; |
1064 | 1071 | ||
1065 | if(ubd_dev->cow.file != NULL) | 1072 | if(ubd_dev->cow.file != NULL) |
1066 | cowify_req(io_req, ubd_dev->cow.bitmap, ubd_dev->cow.bitmap_offset, | 1073 | cowify_req(io_req, ubd_dev->cow.bitmap, |
1067 | ubd_dev->cow.bitmap_len); | 1074 | ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); |
1068 | 1075 | ||
1069 | return(0); | ||
1070 | } | 1076 | } |
1071 | 1077 | ||
1072 | /* Called with dev->lock held */ | 1078 | /* Called with dev->lock held */ |
@@ -1074,29 +1080,45 @@ static void do_ubd_request(request_queue_t *q) | |||
1074 | { | 1080 | { |
1075 | struct io_thread_req io_req; | 1081 | struct io_thread_req io_req; |
1076 | struct request *req; | 1082 | struct request *req; |
1077 | int err, n; | 1083 | int n; |
1078 | 1084 | ||
1079 | if(thread_fd == -1){ | 1085 | while(1){ |
1080 | while((req = elv_next_request(q)) != NULL){ | ||
1081 | err = prepare_request(req, &io_req); | ||
1082 | if(!err){ | ||
1083 | do_io(&io_req); | ||
1084 | __ubd_finish(req, io_req.error); | ||
1085 | } | ||
1086 | } | ||
1087 | } | ||
1088 | else { | ||
1089 | struct ubd *dev = q->queuedata; | 1086 | struct ubd *dev = q->queuedata; |
1090 | if(dev->active || (req = elv_next_request(q)) == NULL) | 1087 | if(dev->end_sg == 0){ |
1091 | return; | 1088 | struct request *req = elv_next_request(q); |
1092 | err = prepare_request(req, &io_req); | 1089 | if(req == NULL) |
1093 | if(!err){ | 1090 | return; |
1094 | dev->active = 1; | 1091 | |
1095 | n = os_write_file_k(thread_fd, &io_req, sizeof(io_req)); | 1092 | dev->request = req; |
1096 | if(n != sizeof(io_req)) | 1093 | blkdev_dequeue_request(req); |
1097 | printk("write to io thread failed, " | 1094 | dev->start_sg = 0; |
1098 | "errno = %d\n", -n); | 1095 | dev->end_sg = blk_rq_map_sg(q, req, dev->sg); |
1096 | } | ||
1097 | |||
1098 | req = dev->request; | ||
1099 | while(dev->start_sg < dev->end_sg){ | ||
1100 | struct scatterlist *sg = &dev->sg[dev->start_sg]; | ||
1101 | |||
1102 | prepare_request(req, &io_req, | ||
1103 | (unsigned long long) req->sector << 9, | ||
1104 | sg->offset, sg->length, sg->page); | ||
1105 | |||
1106 | n = os_write_file_k(thread_fd, (char *) &io_req, | ||
1107 | sizeof(io_req)); | ||
1108 | if(n != sizeof(io_req)){ | ||
1109 | if(n != -EAGAIN) | ||
1110 | printk("write to io thread failed, " | ||
1111 | "errno = %d\n", -n); | ||
1112 | else if(list_empty(&dev->restart)) | ||
1113 | list_add(&dev->restart, &restart); | ||
1114 | return; | ||
1115 | } | ||
1116 | |||
1117 | req->sector += sg->length >> 9; | ||
1118 | dev->start_sg++; | ||
1099 | } | 1119 | } |
1120 | dev->end_sg = 0; | ||
1121 | dev->request = NULL; | ||
1100 | } | 1122 | } |
1101 | } | 1123 | } |
1102 | 1124 | ||