aboutsummaryrefslogtreecommitdiffstats
path: root/arch/um
diff options
context:
space:
mode:
authorJeff Dike <jdike@addtoit.com>2005-09-03 18:57:46 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:06:23 -0400
commit09ace81c1d737bcbb2423db235ac980cac4d5de9 (patch)
treed31987b15d57429bd40843c02b2283b8e2d90cb0 /arch/um
parent75e5584c89d213d6089f64f22cd899fb172e4c95 (diff)
[PATCH] uml: add host AIO support to block driver
This adds AIO support to the ubd driver. The driver breaks a struct request into IO requests to the host, based on the hardware segments in the request and on any COW blocks covered by the request. The ubd IO thread is gone, since there is now an equivalent thread in the AIO module. There is provision for multiple outstanding requests now. Requests aren't retired until all pieces of it have been completed. The AIO requests have a shared count, which is decremented as IO operations come in until it reaches 0. This can be possibly moved to the request struct - haven't looked at this yet. Signed-off-by: Jeff Dike <jdike@addtoit.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/um')
-rw-r--r--arch/um/drivers/Makefile2
-rw-r--r--arch/um/drivers/ubd_kern.c556
-rw-r--r--arch/um/include/aio.h18
-rw-r--r--arch/um/os-Linux/aio.c206
4 files changed, 373 insertions, 409 deletions
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index de17d4c6e02d..783e18cae090 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -13,7 +13,7 @@ mcast-objs := mcast_kern.o mcast_user.o
13net-objs := net_kern.o net_user.o 13net-objs := net_kern.o net_user.o
14mconsole-objs := mconsole_kern.o mconsole_user.o 14mconsole-objs := mconsole_kern.o mconsole_user.o
15hostaudio-objs := hostaudio_kern.o 15hostaudio-objs := hostaudio_kern.o
16ubd-objs := ubd_kern.o ubd_user.o 16ubd-objs := ubd_kern.o
17port-objs := port_kern.o port_user.o 17port-objs := port_kern.o port_user.o
18harddog-objs := harddog_kern.o harddog_user.o 18harddog-objs := harddog_kern.o harddog_user.o
19 19
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f73134333f64..e77a38da4350 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -35,6 +35,7 @@
35#include "linux/blkpg.h" 35#include "linux/blkpg.h"
36#include "linux/genhd.h" 36#include "linux/genhd.h"
37#include "linux/spinlock.h" 37#include "linux/spinlock.h"
38#include "asm/atomic.h"
38#include "asm/segment.h" 39#include "asm/segment.h"
39#include "asm/uaccess.h" 40#include "asm/uaccess.h"
40#include "asm/irq.h" 41#include "asm/irq.h"
@@ -53,20 +54,21 @@
53#include "mem.h" 54#include "mem.h"
54#include "mem_kern.h" 55#include "mem_kern.h"
55#include "cow.h" 56#include "cow.h"
57#include "aio.h"
56 58
57enum ubd_req { UBD_READ, UBD_WRITE }; 59enum ubd_req { UBD_READ, UBD_WRITE };
58 60
59struct io_thread_req { 61struct io_thread_req {
60 enum ubd_req op; 62 enum aio_type op;
61 int fds[2]; 63 int fds[2];
62 unsigned long offsets[2]; 64 unsigned long offsets[2];
63 unsigned long long offset; 65 unsigned long long offset;
64 unsigned long length; 66 unsigned long length;
65 char *buffer; 67 char *buffer;
66 int sectorsize; 68 int sectorsize;
67 unsigned long sector_mask; 69 int bitmap_offset;
68 unsigned long long cow_offset; 70 long bitmap_start;
69 unsigned long bitmap_words[2]; 71 long bitmap_end;
70 int error; 72 int error;
71}; 73};
72 74
@@ -80,28 +82,31 @@ extern int create_cow_file(char *cow_file, char *backing_file,
80 unsigned long *bitmap_len_out, 82 unsigned long *bitmap_len_out,
81 int *data_offset_out); 83 int *data_offset_out);
82extern int read_cow_bitmap(int fd, void *buf, int offset, int len); 84extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
83extern void do_io(struct io_thread_req *req); 85extern void do_io(struct io_thread_req *req, struct request *r,
86 unsigned long *bitmap);
84 87
85static inline int ubd_test_bit(__u64 bit, unsigned char *data) 88static inline int ubd_test_bit(__u64 bit, void *data)
86{ 89{
90 unsigned char *buffer = data;
87 __u64 n; 91 __u64 n;
88 int bits, off; 92 int bits, off;
89 93
90 bits = sizeof(data[0]) * 8; 94 bits = sizeof(buffer[0]) * 8;
91 n = bit / bits; 95 n = bit / bits;
92 off = bit % bits; 96 off = bit % bits;
93 return((data[n] & (1 << off)) != 0); 97 return((buffer[n] & (1 << off)) != 0);
94} 98}
95 99
96static inline void ubd_set_bit(__u64 bit, unsigned char *data) 100static inline void ubd_set_bit(__u64 bit, void *data)
97{ 101{
102 unsigned char *buffer = data;
98 __u64 n; 103 __u64 n;
99 int bits, off; 104 int bits, off;
100 105
101 bits = sizeof(data[0]) * 8; 106 bits = sizeof(buffer[0]) * 8;
102 n = bit / bits; 107 n = bit / bits;
103 off = bit % bits; 108 off = bit % bits;
104 data[n] |= (1 << off); 109 buffer[n] |= (1 << off);
105} 110}
106/*End stuff from ubd_user.h*/ 111/*End stuff from ubd_user.h*/
107 112
@@ -110,8 +115,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
110static DEFINE_SPINLOCK(ubd_io_lock); 115static DEFINE_SPINLOCK(ubd_io_lock);
111static DEFINE_SPINLOCK(ubd_lock); 116static DEFINE_SPINLOCK(ubd_lock);
112 117
113static void (*do_ubd)(void);
114
115static int ubd_open(struct inode * inode, struct file * filp); 118static int ubd_open(struct inode * inode, struct file * filp);
116static int ubd_release(struct inode * inode, struct file * file); 119static int ubd_release(struct inode * inode, struct file * file);
117static int ubd_ioctl(struct inode * inode, struct file * file, 120static int ubd_ioctl(struct inode * inode, struct file * file,
@@ -158,6 +161,8 @@ struct cow {
158 int data_offset; 161 int data_offset;
159}; 162};
160 163
164#define MAX_SG 64
165
161struct ubd { 166struct ubd {
162 char *file; 167 char *file;
163 int count; 168 int count;
@@ -168,6 +173,7 @@ struct ubd {
168 int no_cow; 173 int no_cow;
169 struct cow cow; 174 struct cow cow;
170 struct platform_device pdev; 175 struct platform_device pdev;
176 struct scatterlist sg[MAX_SG];
171}; 177};
172 178
173#define DEFAULT_COW { \ 179#define DEFAULT_COW { \
@@ -460,80 +466,113 @@ __uml_help(fakehd,
460); 466);
461 467
462static void do_ubd_request(request_queue_t * q); 468static void do_ubd_request(request_queue_t * q);
463 469static int in_ubd;
464/* Only changed by ubd_init, which is an initcall. */
465int thread_fd = -1;
466 470
467/* Changed by ubd_handler, which is serialized because interrupts only 471/* Changed by ubd_handler, which is serialized because interrupts only
468 * happen on CPU 0. 472 * happen on CPU 0.
469 */ 473 */
470int intr_count = 0; 474int intr_count = 0;
471 475
472/* call ubd_finish if you need to serialize */ 476static void ubd_end_request(struct request *req, int bytes, int uptodate)
473static void __ubd_finish(struct request *req, int error)
474{ 477{
475 int nsect; 478 if (!end_that_request_first(req, uptodate, bytes >> 9)) {
476 479 add_disk_randomness(req->rq_disk);
477 if(error){ 480 end_that_request_last(req);
478 end_request(req, 0);
479 return;
480 } 481 }
481 nsect = req->current_nr_sectors;
482 req->sector += nsect;
483 req->buffer += nsect << 9;
484 req->errors = 0;
485 req->nr_sectors -= nsect;
486 req->current_nr_sectors = 0;
487 end_request(req, 1);
488} 482}
489 483
490static inline void ubd_finish(struct request *req, int error) 484/* call ubd_finish if you need to serialize */
485static void __ubd_finish(struct request *req, int bytes)
491{ 486{
492 spin_lock(&ubd_io_lock); 487 if(bytes < 0){
493 __ubd_finish(req, error); 488 ubd_end_request(req, 0, 0);
494 spin_unlock(&ubd_io_lock); 489 return;
490 }
491
492 ubd_end_request(req, bytes, 1);
495} 493}
496 494
497/* Called without ubd_io_lock held */ 495static inline void ubd_finish(struct request *req, int bytes)
498static void ubd_handler(void)
499{ 496{
500 struct io_thread_req req; 497 spin_lock(&ubd_io_lock);
501 struct request *rq = elv_next_request(ubd_queue); 498 __ubd_finish(req, bytes);
502 int n; 499 spin_unlock(&ubd_io_lock);
503
504 do_ubd = NULL;
505 intr_count++;
506 n = os_read_file(thread_fd, &req, sizeof(req));
507 if(n != sizeof(req)){
508 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
509 "err = %d\n", os_getpid(), -n);
510 spin_lock(&ubd_io_lock);
511 end_request(rq, 0);
512 spin_unlock(&ubd_io_lock);
513 return;
514 }
515
516 ubd_finish(rq, req.error);
517 reactivate_fd(thread_fd, UBD_IRQ);
518 do_ubd_request(ubd_queue);
519} 500}
520 501
502struct bitmap_io {
503 atomic_t count;
504 struct aio_context aio;
505};
506
507struct ubd_aio {
508 struct aio_context aio;
509 struct request *req;
510 int len;
511 struct bitmap_io *bitmap;
512 void *bitmap_buf;
513};
514
515static int ubd_reply_fd = -1;
516
521static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) 517static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
522{ 518{
523 ubd_handler(); 519 struct aio_thread_reply reply;
524 return(IRQ_HANDLED); 520 struct ubd_aio *aio;
525} 521 struct request *req;
522 int err, n, fd = (int) (long) dev;
526 523
527/* Only changed by ubd_init, which is an initcall. */ 524 while(1){
528static int io_pid = -1; 525 err = os_read_file(fd, &reply, sizeof(reply));
526 if(err == -EAGAIN)
527 break;
528 if(err < 0){
529 printk("ubd_aio_handler - read returned err %d\n",
530 -err);
531 break;
532 }
529 533
530void kill_io_thread(void) 534 aio = container_of(reply.data, struct ubd_aio, aio);
531{ 535 n = reply.err;
532 if(io_pid != -1) 536
533 os_kill_process(io_pid, 1); 537 if(n == 0){
534} 538 req = aio->req;
539 req->nr_sectors -= aio->len >> 9;
535 540
536__uml_exitcall(kill_io_thread); 541 if((aio->bitmap != NULL) &&
542 (atomic_dec_and_test(&aio->bitmap->count))){
543 aio->aio = aio->bitmap->aio;
544 aio->len = 0;
545 kfree(aio->bitmap);
546 aio->bitmap = NULL;
547 submit_aio(&aio->aio);
548 }
549 else {
550 if((req->nr_sectors == 0) &&
551 (aio->bitmap == NULL)){
552 int len = req->hard_nr_sectors << 9;
553 ubd_finish(req, len);
554 }
555
556 if(aio->bitmap_buf != NULL)
557 kfree(aio->bitmap_buf);
558 kfree(aio);
559 }
560 }
561 else if(n < 0){
562 ubd_finish(aio->req, n);
563 if(aio->bitmap != NULL)
564 kfree(aio->bitmap);
565 if(aio->bitmap_buf != NULL)
566 kfree(aio->bitmap_buf);
567 kfree(aio);
568 }
569 }
570 reactivate_fd(fd, UBD_IRQ);
571
572 do_ubd_request(ubd_queue);
573
574 return(IRQ_HANDLED);
575}
537 576
538static int ubd_file_size(struct ubd *dev, __u64 *size_out) 577static int ubd_file_size(struct ubd *dev, __u64 *size_out)
539{ 578{
@@ -569,7 +608,7 @@ static int ubd_open_dev(struct ubd *dev)
569 &dev->cow.data_offset, create_ptr); 608 &dev->cow.data_offset, create_ptr);
570 609
571 if((dev->fd == -ENOENT) && create_cow){ 610 if((dev->fd == -ENOENT) && create_cow){
572 dev->fd = create_cow_file(dev->file, dev->cow.file, 611 dev->fd = create_cow_file(dev->file, dev->cow.file,
573 dev->openflags, 1 << 9, PAGE_SIZE, 612 dev->openflags, 1 << 9, PAGE_SIZE,
574 &dev->cow.bitmap_offset, 613 &dev->cow.bitmap_offset,
575 &dev->cow.bitmap_len, 614 &dev->cow.bitmap_len,
@@ -831,6 +870,10 @@ int ubd_init(void)
831{ 870{
832 int i; 871 int i;
833 872
873 ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr);
874 if(ubd_reply_fd < 0)
875 printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd);
876
834 devfs_mk_dir("ubd"); 877 devfs_mk_dir("ubd");
835 if (register_blkdev(MAJOR_NR, "ubd")) 878 if (register_blkdev(MAJOR_NR, "ubd"))
836 return -1; 879 return -1;
@@ -841,6 +884,7 @@ int ubd_init(void)
841 return -1; 884 return -1;
842 } 885 }
843 886
887 blk_queue_max_hw_segments(ubd_queue, MAX_SG);
844 if (fake_major != MAJOR_NR) { 888 if (fake_major != MAJOR_NR) {
845 char name[sizeof("ubd_nnn\0")]; 889 char name[sizeof("ubd_nnn\0")];
846 890
@@ -852,40 +896,12 @@ int ubd_init(void)
852 driver_register(&ubd_driver); 896 driver_register(&ubd_driver);
853 for (i = 0; i < MAX_DEV; i++) 897 for (i = 0; i < MAX_DEV; i++)
854 ubd_add(i); 898 ubd_add(i);
899
855 return 0; 900 return 0;
856} 901}
857 902
858late_initcall(ubd_init); 903late_initcall(ubd_init);
859 904
860int ubd_driver_init(void){
861 unsigned long stack;
862 int err;
863
864 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
865 if(global_openflags.s){
866 printk(KERN_INFO "ubd: Synchronous mode\n");
867 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
868 * enough. So use anyway the io thread. */
869 }
870 stack = alloc_stack(0, 0);
871 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
872 &thread_fd);
873 if(io_pid < 0){
874 printk(KERN_ERR
875 "ubd : Failed to start I/O thread (errno = %d) - "
876 "falling back to synchronous I/O\n", -io_pid);
877 io_pid = -1;
878 return(0);
879 }
880 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
881 SA_INTERRUPT, "ubd", ubd_dev);
882 if(err != 0)
883 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
884 return(err);
885}
886
887device_initcall(ubd_driver_init);
888
889static int ubd_open(struct inode *inode, struct file *filp) 905static int ubd_open(struct inode *inode, struct file *filp)
890{ 906{
891 struct gendisk *disk = inode->i_bdev->bd_disk; 907 struct gendisk *disk = inode->i_bdev->bd_disk;
@@ -923,105 +939,55 @@ static int ubd_release(struct inode * inode, struct file * file)
923 return(0); 939 return(0);
924} 940}
925 941
926static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, 942static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap)
927 __u64 *cow_offset, unsigned long *bitmap,
928 __u64 bitmap_offset, unsigned long *bitmap_words,
929 __u64 bitmap_len)
930{ 943{
931 __u64 sector = io_offset >> 9; 944 __u64 sector = req->offset / req->sectorsize;
932 int i, update_bitmap = 0; 945 int i;
933
934 for(i = 0; i < length >> 9; i++){
935 if(cow_mask != NULL)
936 ubd_set_bit(i, (unsigned char *) cow_mask);
937 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
938 continue;
939
940 update_bitmap = 1;
941 ubd_set_bit(sector + i, (unsigned char *) bitmap);
942 }
943
944 if(!update_bitmap)
945 return;
946
947 *cow_offset = sector / (sizeof(unsigned long) * 8);
948
949 /* This takes care of the case where we're exactly at the end of the
950 * device, and *cow_offset + 1 is off the end. So, just back it up
951 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
952 * for the original diagnosis.
953 */
954 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
955 sizeof(unsigned long) - 1))
956 (*cow_offset)--;
957
958 bitmap_words[0] = bitmap[*cow_offset];
959 bitmap_words[1] = bitmap[*cow_offset + 1];
960
961 *cow_offset *= sizeof(unsigned long);
962 *cow_offset += bitmap_offset;
963}
964 946
965static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, 947 for(i = 0; i < req->length / req->sectorsize; i++){
966 __u64 bitmap_offset, __u64 bitmap_len) 948 if(ubd_test_bit(sector + i, bitmap))
967{ 949 continue;
968 __u64 sector = req->offset >> 9;
969 int i;
970 950
971 if(req->length > (sizeof(req->sector_mask) * 8) << 9) 951 if(req->bitmap_start == -1)
972 panic("Operation too long"); 952 req->bitmap_start = sector + i;
953 req->bitmap_end = sector + i + 1;
973 954
974 if(req->op == UBD_READ) { 955 ubd_set_bit(sector + i, bitmap);
975 for(i = 0; i < req->length >> 9; i++){ 956 }
976 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
977 ubd_set_bit(i, (unsigned char *)
978 &req->sector_mask);
979 }
980 }
981 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
982 &req->cow_offset, bitmap, bitmap_offset,
983 req->bitmap_words, bitmap_len);
984} 957}
985 958
986/* Called with ubd_io_lock held */ 959/* Called with ubd_io_lock held */
987static int prepare_request(struct request *req, struct io_thread_req *io_req) 960static int prepare_request(struct request *req, struct io_thread_req *io_req,
961 unsigned long long offset, int page_offset,
962 int len, struct page *page)
988{ 963{
989 struct gendisk *disk = req->rq_disk; 964 struct gendisk *disk = req->rq_disk;
990 struct ubd *dev = disk->private_data; 965 struct ubd *dev = disk->private_data;
991 __u64 offset;
992 int len;
993
994 if(req->rq_status == RQ_INACTIVE) return(1);
995 966
996 /* This should be impossible now */ 967 /* This should be impossible now */
997 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ 968 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
998 printk("Write attempted on readonly ubd device %s\n", 969 printk("Write attempted on readonly ubd device %s\n",
999 disk->disk_name); 970 disk->disk_name);
1000 end_request(req, 0); 971 ubd_end_request(req, 0, 0);
1001 return(1); 972 return(1);
1002 } 973 }
1003 974
1004 offset = ((__u64) req->sector) << 9;
1005 len = req->current_nr_sectors << 9;
1006
1007 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; 975 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1008 io_req->fds[1] = dev->fd; 976 io_req->fds[1] = dev->fd;
1009 io_req->cow_offset = -1;
1010 io_req->offset = offset; 977 io_req->offset = offset;
1011 io_req->length = len; 978 io_req->length = len;
1012 io_req->error = 0; 979 io_req->error = 0;
1013 io_req->sector_mask = 0; 980 io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE;
1014
1015 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1016 io_req->offsets[0] = 0; 981 io_req->offsets[0] = 0;
1017 io_req->offsets[1] = dev->cow.data_offset; 982 io_req->offsets[1] = dev->cow.data_offset;
1018 io_req->buffer = req->buffer; 983 io_req->buffer = page_address(page) + page_offset;
1019 io_req->sectorsize = 1 << 9; 984 io_req->sectorsize = 1 << 9;
985 io_req->bitmap_offset = dev->cow.bitmap_offset;
986 io_req->bitmap_start = -1;
987 io_req->bitmap_end = -1;
1020 988
1021 if(dev->cow.file != NULL) 989 if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE))
1022 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, 990 cowify_bitmap(io_req, dev->cow.bitmap);
1023 dev->cow.bitmap_len);
1024
1025 return(0); 991 return(0);
1026} 992}
1027 993
@@ -1030,30 +996,36 @@ static void do_ubd_request(request_queue_t *q)
1030{ 996{
1031 struct io_thread_req io_req; 997 struct io_thread_req io_req;
1032 struct request *req; 998 struct request *req;
1033 int err, n; 999 __u64 sector;
1034 1000 int err;
1035 if(thread_fd == -1){ 1001
1036 while((req = elv_next_request(q)) != NULL){ 1002 if(in_ubd)
1037 err = prepare_request(req, &io_req); 1003 return;
1038 if(!err){ 1004 in_ubd = 1;
1039 do_io(&io_req); 1005 while((req = elv_next_request(q)) != NULL){
1040 __ubd_finish(req, io_req.error); 1006 struct gendisk *disk = req->rq_disk;
1041 } 1007 struct ubd *dev = disk->private_data;
1042 } 1008 int n, i;
1043 } 1009
1044 else { 1010 blkdev_dequeue_request(req);
1045 if(do_ubd || (req = elv_next_request(q)) == NULL) 1011
1046 return; 1012 sector = req->sector;
1047 err = prepare_request(req, &io_req); 1013 n = blk_rq_map_sg(q, req, dev->sg);
1048 if(!err){ 1014
1049 do_ubd = ubd_handler; 1015 for(i = 0; i < n; i++){
1050 n = os_write_file(thread_fd, (char *) &io_req, 1016 struct scatterlist *sg = &dev->sg[i];
1051 sizeof(io_req)); 1017
1052 if(n != sizeof(io_req)) 1018 err = prepare_request(req, &io_req, sector << 9,
1053 printk("write to io thread failed, " 1019 sg->offset, sg->length,
1054 "errno = %d\n", -n); 1020 sg->page);
1021 if(err)
1022 continue;
1023
1024 sector += sg->length >> 9;
1025 do_io(&io_req, req, dev->cow.bitmap);
1055 } 1026 }
1056 } 1027 }
1028 in_ubd = 0;
1057} 1029}
1058 1030
1059static int ubd_ioctl(struct inode * inode, struct file * file, 1031static int ubd_ioctl(struct inode * inode, struct file * file,
@@ -1269,131 +1241,95 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1269 return(err); 1241 return(err);
1270} 1242}
1271 1243
1272static int update_bitmap(struct io_thread_req *req) 1244void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap)
1273{ 1245{
1274 int n; 1246 struct ubd_aio *aio;
1275 1247 struct bitmap_io *bitmap_io = NULL;
1276 if(req->cow_offset == -1) 1248 char *buf;
1277 return(0); 1249 void *bitmap_buf = NULL;
1278 1250 unsigned long len, sector;
1279 n = os_seek_file(req->fds[1], req->cow_offset); 1251 int nsectors, start, end, bit, err;
1280 if(n < 0){ 1252 __u64 off;
1281 printk("do_io - bitmap lseek failed : err = %d\n", -n); 1253
1282 return(1); 1254 if(req->bitmap_start != -1){
1283 } 1255 /* Round up to the nearest word */
1284 1256 int round = sizeof(unsigned long);
1285 n = os_write_file(req->fds[1], &req->bitmap_words, 1257 len = (req->bitmap_end - req->bitmap_start +
1286 sizeof(req->bitmap_words)); 1258 round * 8 - 1) / (round * 8);
1287 if(n != sizeof(req->bitmap_words)){ 1259 len *= round;
1288 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, 1260
1289 req->fds[1]); 1261 off = req->bitmap_start / (8 * round);
1290 return(1); 1262 off *= round;
1291 } 1263
1292 1264 bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL);
1293 return(0); 1265 if(bitmap_io == NULL){
1294} 1266 printk("Failed to kmalloc bitmap IO\n");
1295 1267 req->error = 1;
1296void do_io(struct io_thread_req *req) 1268 return;
1297{ 1269 }
1298 char *buf;
1299 unsigned long len;
1300 int n, nsectors, start, end, bit;
1301 int err;
1302 __u64 off;
1303
1304 nsectors = req->length / req->sectorsize;
1305 start = 0;
1306 do {
1307 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1308 end = start;
1309 while((end < nsectors) &&
1310 (ubd_test_bit(end, (unsigned char *)
1311 &req->sector_mask) == bit))
1312 end++;
1313
1314 off = req->offset + req->offsets[bit] +
1315 start * req->sectorsize;
1316 len = (end - start) * req->sectorsize;
1317 buf = &req->buffer[start * req->sectorsize];
1318
1319 err = os_seek_file(req->fds[bit], off);
1320 if(err < 0){
1321 printk("do_io - lseek failed : err = %d\n", -err);
1322 req->error = 1;
1323 return;
1324 }
1325 if(req->op == UBD_READ){
1326 n = 0;
1327 do {
1328 buf = &buf[n];
1329 len -= n;
1330 n = os_read_file(req->fds[bit], buf, len);
1331 if (n < 0) {
1332 printk("do_io - read failed, err = %d "
1333 "fd = %d\n", -n, req->fds[bit]);
1334 req->error = 1;
1335 return;
1336 }
1337 } while((n < len) && (n != 0));
1338 if (n < len) memset(&buf[n], 0, len - n);
1339 } else {
1340 n = os_write_file(req->fds[bit], buf, len);
1341 if(n != len){
1342 printk("do_io - write failed err = %d "
1343 "fd = %d\n", -n, req->fds[bit]);
1344 req->error = 1;
1345 return;
1346 }
1347 }
1348 1270
1349 start = end; 1271 bitmap_buf = kmalloc(len, GFP_KERNEL);
1350 } while(start < nsectors); 1272 if(bitmap_buf == NULL){
1273 printk("do_io : kmalloc of bitmap chunk "
1274 "failed\n");
1275 kfree(bitmap_io);
1276 req->error = 1;
1277 return;
1278 }
1279 memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len);
1280
1281 *bitmap_io = ((struct bitmap_io)
1282 { .count = ATOMIC_INIT(0),
1283 .aio = INIT_AIO(AIO_WRITE, req->fds[1],
1284 bitmap_buf, len,
1285 req->bitmap_offset + off,
1286 ubd_reply_fd) } );
1287 }
1351 1288
1352 req->error = update_bitmap(req); 1289 nsectors = req->length / req->sectorsize;
1353} 1290 start = 0;
1291 end = nsectors;
1292 bit = 0;
1293 do {
1294 if(bitmap != NULL){
1295 sector = req->offset / req->sectorsize;
1296 bit = ubd_test_bit(sector + start, bitmap);
1297 end = start;
1298 while((end < nsectors) &&
1299 (ubd_test_bit(sector + end, bitmap) == bit))
1300 end++;
1301 }
1354 1302
1355/* Changed in start_io_thread, which is serialized by being called only 1303 off = req->offsets[bit] + req->offset +
1356 * from ubd_init, which is an initcall. 1304 start * req->sectorsize;
1357 */ 1305 len = (end - start) * req->sectorsize;
1358int kernel_fd = -1; 1306 buf = &req->buffer[start * req->sectorsize];
1359 1307
1360/* Only changed by the io thread */ 1308 aio = kmalloc(sizeof(*aio), GFP_KERNEL);
1361int io_count = 0; 1309 if(aio == NULL){
1310 req->error = 1;
1311 return;
1312 }
1362 1313
1363int io_thread(void *arg) 1314 *aio = ((struct ubd_aio)
1364{ 1315 { .aio = INIT_AIO(req->op, req->fds[bit], buf,
1365 struct io_thread_req req; 1316 len, off, ubd_reply_fd),
1366 int n; 1317 .len = len,
1318 .req = r,
1319 .bitmap = bitmap_io,
1320 .bitmap_buf = bitmap_buf });
1321
1322 if(aio->bitmap != NULL)
1323 atomic_inc(&aio->bitmap->count);
1324
1325 err = submit_aio(&aio->aio);
1326 if(err){
1327 printk("do_io - submit_aio failed, "
1328 "err = %d\n", err);
1329 req->error = 1;
1330 return;
1331 }
1367 1332
1368 ignore_sigwinch_sig(); 1333 start = end;
1369 while(1){ 1334 } while(start < nsectors);
1370 n = os_read_file(kernel_fd, &req, sizeof(req));
1371 if(n != sizeof(req)){
1372 if(n < 0)
1373 printk("io_thread - read failed, fd = %d, "
1374 "err = %d\n", kernel_fd, -n);
1375 else {
1376 printk("io_thread - short read, fd = %d, "
1377 "length = %d\n", kernel_fd, n);
1378 }
1379 continue;
1380 }
1381 io_count++;
1382 do_io(&req);
1383 n = os_write_file(kernel_fd, &req, sizeof(req));
1384 if(n != sizeof(req))
1385 printk("io_thread - write failed, fd = %d, err = %d\n",
1386 kernel_fd, -n);
1387 }
1388} 1335}
1389
1390/*
1391 * Overrides for Emacs so that we follow Linus's tabbing style.
1392 * Emacs will notice this stuff at the end of the file and automatically
1393 * adjust the settings for this buffer only. This must remain at the end
1394 * of the file.
1395 * ---------------------------------------------------------------------------
1396 * Local variables:
1397 * c-file-style: "linux"
1398 * End:
1399 */
diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h
index 423bae9153f8..83f16877ab08 100644
--- a/arch/um/include/aio.h
+++ b/arch/um/include/aio.h
@@ -14,15 +14,27 @@ struct aio_thread_reply {
14}; 14};
15 15
16struct aio_context { 16struct aio_context {
17 enum aio_type type;
18 int fd;
19 void *data;
20 int len;
21 unsigned long long offset;
17 int reply_fd; 22 int reply_fd;
18 struct aio_context *next; 23 struct aio_context *next;
19}; 24};
20 25
26#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \
27 aio_reply_fd) \
28 { .type = aio_type, \
29 .fd = aio_fd, \
30 .data = aio_data, \
31 .len = aio_len, \
32 .offset = aio_offset, \
33 .reply_fd = aio_reply_fd }
34
21#define INIT_AIO_CONTEXT { .reply_fd = -1, \ 35#define INIT_AIO_CONTEXT { .reply_fd = -1, \
22 .next = NULL } 36 .next = NULL }
23 37
24extern int submit_aio(enum aio_type type, int fd, char *buf, int len, 38extern int submit_aio(struct aio_context *aio);
25 unsigned long long offset, int reply_fd,
26 struct aio_context *aio);
27 39
28#endif 40#endif
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index f2ca2992bbd6..b04897cd995d 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -6,6 +6,7 @@
6#include <stdlib.h> 6#include <stdlib.h>
7#include <unistd.h> 7#include <unistd.h>
8#include <signal.h> 8#include <signal.h>
9#include <string.h>
9#include <errno.h> 10#include <errno.h>
10#include <sched.h> 11#include <sched.h>
11#include <sys/syscall.h> 12#include <sys/syscall.h>
@@ -16,18 +17,31 @@
16#include "user.h" 17#include "user.h"
17#include "mode.h" 18#include "mode.h"
18 19
19struct aio_thread_req {
20 enum aio_type type;
21 int io_fd;
22 unsigned long long offset;
23 char *buf;
24 int len;
25 struct aio_context *aio;
26};
27
28static int aio_req_fd_r = -1; 20static int aio_req_fd_r = -1;
29static int aio_req_fd_w = -1; 21static int aio_req_fd_w = -1;
30 22
23static int update_aio(struct aio_context *aio, int res)
24{
25 if(res < 0)
26 aio->len = res;
27 else if((res == 0) && (aio->type == AIO_READ)){
28 /* This is the EOF case - we have hit the end of the file
29 * and it ends in a partial block, so we fill the end of
30 * the block with zeros and claim success.
31 */
32 memset(aio->data, 0, aio->len);
33 aio->len = 0;
34 }
35 else if(res > 0){
36 aio->len -= res;
37 aio->data += res;
38 aio->offset += res;
39 return aio->len;
40 }
41
42 return 0;
43}
44
31#if defined(HAVE_AIO_ABI) 45#if defined(HAVE_AIO_ABI)
32#include <linux/aio_abi.h> 46#include <linux/aio_abi.h>
33 47
@@ -66,8 +80,7 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
66 * that it now backs the mmapped area. 80 * that it now backs the mmapped area.
67 */ 81 */
68 82
69static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, 83static int do_aio(aio_context_t ctx, struct aio_context *aio)
70 int len, unsigned long long offset, struct aio_context *aio)
71{ 84{
72 struct iocb iocb, *iocbp = &iocb; 85 struct iocb iocb, *iocbp = &iocb;
73 char c; 86 char c;
@@ -75,37 +88,37 @@ static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
75 88
76 iocb = ((struct iocb) { .aio_data = (unsigned long) aio, 89 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
77 .aio_reqprio = 0, 90 .aio_reqprio = 0,
78 .aio_fildes = fd, 91 .aio_fildes = aio->fd,
79 .aio_buf = (unsigned long) buf, 92 .aio_buf = (unsigned long) aio->data,
80 .aio_nbytes = len, 93 .aio_nbytes = aio->len,
81 .aio_offset = offset, 94 .aio_offset = aio->offset,
82 .aio_reserved1 = 0, 95 .aio_reserved1 = 0,
83 .aio_reserved2 = 0, 96 .aio_reserved2 = 0,
84 .aio_reserved3 = 0 }); 97 .aio_reserved3 = 0 });
85 98
86 switch(type){ 99 switch(aio->type){
87 case AIO_READ: 100 case AIO_READ:
88 iocb.aio_lio_opcode = IOCB_CMD_PREAD; 101 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
89 err = io_submit(ctx, 1, &iocbp);
90 break; 102 break;
91 case AIO_WRITE: 103 case AIO_WRITE:
92 iocb.aio_lio_opcode = IOCB_CMD_PWRITE; 104 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
93 err = io_submit(ctx, 1, &iocbp);
94 break; 105 break;
95 case AIO_MMAP: 106 case AIO_MMAP:
96 iocb.aio_lio_opcode = IOCB_CMD_PREAD; 107 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
97 iocb.aio_buf = (unsigned long) &c; 108 iocb.aio_buf = (unsigned long) &c;
98 iocb.aio_nbytes = sizeof(c); 109 iocb.aio_nbytes = sizeof(c);
99 err = io_submit(ctx, 1, &iocbp);
100 break; 110 break;
101 default: 111 default:
102 printk("Bogus op in do_aio - %d\n", type); 112 printk("Bogus op in do_aio - %d\n", aio->type);
103 err = -EINVAL; 113 err = -EINVAL;
104 break; 114 goto out;
105 } 115 }
116
117 err = io_submit(ctx, 1, &iocbp);
106 if(err > 0) 118 if(err > 0)
107 err = 0; 119 err = 0;
108 120
121 out:
109 return err; 122 return err;
110} 123}
111 124
@@ -114,8 +127,9 @@ static aio_context_t ctx = 0;
114static int aio_thread(void *arg) 127static int aio_thread(void *arg)
115{ 128{
116 struct aio_thread_reply reply; 129 struct aio_thread_reply reply;
130 struct aio_context *aio;
117 struct io_event event; 131 struct io_event event;
118 int err, n, reply_fd; 132 int err, n;
119 133
120 signal(SIGWINCH, SIG_IGN); 134 signal(SIGWINCH, SIG_IGN);
121 135
@@ -128,15 +142,21 @@ static int aio_thread(void *arg)
128 "errno = %d\n", errno); 142 "errno = %d\n", errno);
129 } 143 }
130 else { 144 else {
145 aio = (struct aio_context *) event.data;
146 if(update_aio(aio, event.res)){
147 do_aio(ctx, aio);
148 continue;
149 }
150
131 reply = ((struct aio_thread_reply) 151 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data, 152 { .data = aio,
133 .err = event.res }); 153 .err = aio->len });
134 reply_fd = ((struct aio_context *) reply.data)->reply_fd; 154 err = os_write_file(aio->reply_fd, &reply,
135 err = os_write_file(reply_fd, &reply, sizeof(reply)); 155 sizeof(reply));
136 if(err != sizeof(reply)) 156 if(err != sizeof(reply))
137 printk("not_aio_thread - write failed, " 157 printk("aio_thread - write failed, "
138 "fd = %d, err = %d\n", 158 "fd = %d, err = %d\n", aio->reply_fd,
139 aio_req_fd_r, -err); 159 -err);
140 } 160 }
141 } 161 }
142 return 0; 162 return 0;
@@ -144,35 +164,35 @@ static int aio_thread(void *arg)
144 164
145#endif 165#endif
146 166
147static int do_not_aio(struct aio_thread_req *req) 167static int do_not_aio(struct aio_context *aio)
148{ 168{
149 char c; 169 char c;
150 int err; 170 int err;
151 171
152 switch(req->type){ 172 switch(aio->type){
153 case AIO_READ: 173 case AIO_READ:
154 err = os_seek_file(req->io_fd, req->offset); 174 err = os_seek_file(aio->fd, aio->offset);
155 if(err) 175 if(err)
156 goto out; 176 goto out;
157 177
158 err = os_read_file(req->io_fd, req->buf, req->len); 178 err = os_read_file(aio->fd, aio->data, aio->len);
159 break; 179 break;
160 case AIO_WRITE: 180 case AIO_WRITE:
161 err = os_seek_file(req->io_fd, req->offset); 181 err = os_seek_file(aio->fd, aio->offset);
162 if(err) 182 if(err)
163 goto out; 183 goto out;
164 184
165 err = os_write_file(req->io_fd, req->buf, req->len); 185 err = os_write_file(aio->fd, aio->data, aio->len);
166 break; 186 break;
167 case AIO_MMAP: 187 case AIO_MMAP:
168 err = os_seek_file(req->io_fd, req->offset); 188 err = os_seek_file(aio->fd, aio->offset);
169 if(err) 189 if(err)
170 goto out; 190 goto out;
171 191
172 err = os_read_file(req->io_fd, &c, sizeof(c)); 192 err = os_read_file(aio->fd, &c, sizeof(c));
173 break; 193 break;
174 default: 194 default:
175 printk("do_not_aio - bad request type : %d\n", req->type); 195 printk("do_not_aio - bad request type : %d\n", aio->type);
176 err = -EINVAL; 196 err = -EINVAL;
177 break; 197 break;
178 } 198 }
@@ -183,14 +203,14 @@ static int do_not_aio(struct aio_thread_req *req)
183 203
184static int not_aio_thread(void *arg) 204static int not_aio_thread(void *arg)
185{ 205{
186 struct aio_thread_req req; 206 struct aio_context *aio;
187 struct aio_thread_reply reply; 207 struct aio_thread_reply reply;
188 int err; 208 int err;
189 209
190 signal(SIGWINCH, SIG_IGN); 210 signal(SIGWINCH, SIG_IGN);
191 while(1){ 211 while(1){
192 err = os_read_file(aio_req_fd_r, &req, sizeof(req)); 212 err = os_read_file(aio_req_fd_r, &aio, sizeof(aio));
193 if(err != sizeof(req)){ 213 if(err != sizeof(aio)){
194 if(err < 0) 214 if(err < 0)
195 printk("not_aio_thread - read failed, " 215 printk("not_aio_thread - read failed, "
196 "fd = %d, err = %d\n", aio_req_fd_r, 216 "fd = %d, err = %d\n", aio_req_fd_r,
@@ -201,17 +221,34 @@ static int not_aio_thread(void *arg)
201 } 221 }
202 continue; 222 continue;
203 } 223 }
204 err = do_not_aio(&req); 224 again:
205 reply = ((struct aio_thread_reply) { .data = req.aio, 225 err = do_not_aio(aio);
206 .err = err }); 226
207 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); 227 if(update_aio(aio, err))
228 goto again;
229
230 reply = ((struct aio_thread_reply) { .data = aio,
231 .err = aio->len });
232 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
208 if(err != sizeof(reply)) 233 if(err != sizeof(reply))
209 printk("not_aio_thread - write failed, fd = %d, " 234 printk("not_aio_thread - write failed, fd = %d, "
210 "err = %d\n", aio_req_fd_r, -err); 235 "err = %d\n", aio_req_fd_r, -err);
211 } 236 }
212} 237}
213 238
239static int submit_aio_24(struct aio_context *aio)
240{
241 int err;
242
243 err = os_write_file(aio_req_fd_w, &aio, sizeof(aio));
244 if(err == sizeof(aio))
245 err = 0;
246
247 return err;
248}
249
214static int aio_pid = -1; 250static int aio_pid = -1;
251static int (*submit_proc)(struct aio_context *aio);
215 252
216static int init_aio_24(void) 253static int init_aio_24(void)
217{ 254{
@@ -243,11 +280,33 @@ static int init_aio_24(void)
243#endif 280#endif
244 printk("2.6 host AIO support not used - falling back to I/O " 281 printk("2.6 host AIO support not used - falling back to I/O "
245 "thread\n"); 282 "thread\n");
283
284 submit_proc = submit_aio_24;
285
246 return 0; 286 return 0;
247} 287}
248 288
249#ifdef HAVE_AIO_ABI 289#ifdef HAVE_AIO_ABI
250#define DEFAULT_24_AIO 0 290#define DEFAULT_24_AIO 0
291static int submit_aio_26(struct aio_context *aio)
292{
293 struct aio_thread_reply reply;
294 int err;
295
296 err = do_aio(ctx, aio);
297 if(err){
298 reply = ((struct aio_thread_reply) { .data = aio,
299 .err = err });
300 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
301 if(err != sizeof(reply))
302 printk("submit_aio_26 - write failed, "
303 "fd = %d, err = %d\n", aio->reply_fd, -err);
304 else err = 0;
305 }
306
307 return err;
308}
309
251static int init_aio_26(void) 310static int init_aio_26(void)
252{ 311{
253 unsigned long stack; 312 unsigned long stack;
@@ -267,39 +326,22 @@ static int init_aio_26(void)
267 aio_pid = err; 326 aio_pid = err;
268 327
269 printk("Using 2.6 host AIO\n"); 328 printk("Using 2.6 host AIO\n");
270 return 0;
271}
272
273static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
274 unsigned long long offset, struct aio_context *aio)
275{
276 struct aio_thread_reply reply;
277 int err;
278 329
279 err = do_aio(ctx, type, io_fd, buf, len, offset, aio); 330 submit_proc = submit_aio_26;
280 if(err){
281 reply = ((struct aio_thread_reply) { .data = aio,
282 .err = err });
283 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
284 if(err != sizeof(reply))
285 printk("submit_aio_26 - write failed, "
286 "fd = %d, err = %d\n", aio->reply_fd, -err);
287 else err = 0;
288 }
289 331
290 return err; 332 return 0;
291} 333}
292 334
293#else 335#else
294#define DEFAULT_24_AIO 1 336#define DEFAULT_24_AIO 1
295static int init_aio_26(void) 337static int submit_aio_26(struct aio_context *aio)
296{ 338{
297 return -ENOSYS; 339 return -ENOSYS;
298} 340}
299 341
300static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, 342static int init_aio_26(void)
301 unsigned long long offset, struct aio_context *aio)
302{ 343{
344 submit_proc = submit_aio_26;
303 return -ENOSYS; 345 return -ENOSYS;
304} 346}
305#endif 347#endif
@@ -366,33 +408,7 @@ static void exit_aio(void)
366 408
367__uml_exitcall(exit_aio); 409__uml_exitcall(exit_aio);
368 410
369static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, 411int submit_aio(struct aio_context *aio)
370 unsigned long long offset, struct aio_context *aio)
371{ 412{
372 struct aio_thread_req req = { .type = type, 413 return (*submit_proc)(aio);
373 .io_fd = io_fd,
374 .offset = offset,
375 .buf = buf,
376 .len = len,
377 .aio = aio,
378 };
379 int err;
380
381 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
382 if(err == sizeof(req))
383 err = 0;
384
385 return err;
386}
387
388int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
389 unsigned long long offset, int reply_fd,
390 struct aio_context *aio)
391{
392 aio->reply_fd = reply_fd;
393 if(aio_24)
394 return submit_aio_24(type, io_fd, buf, len, offset, aio);
395 else {
396 return submit_aio_26(type, io_fd, buf, len, offset, aio);
397 }
398} 414}