aboutsummaryrefslogtreecommitdiffstats
path: root/arch/um/drivers/ubd_kern.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/drivers/ubd_kern.c')
-rw-r--r--arch/um/drivers/ubd_kern.c1669
1 files changed, 1669 insertions, 0 deletions
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
new file mode 100644
index 000000000000..4d8b165bfa48
--- /dev/null
+++ b/arch/um/drivers/ubd_kern.c
@@ -0,0 +1,1669 @@
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/devfs_fs_kernel.h"
29#include "linux/cdrom.h"
30#include "linux/proc_fs.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/vmalloc.h"
35#include "linux/blkpg.h"
36#include "linux/genhd.h"
37#include "linux/spinlock.h"
38#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "user_util.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
52#include "2_5compat.h"
53#include "os.h"
54#include "mem.h"
55#include "mem_kern.h"
56#include "cow.h"
57
58enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP };
59
60struct io_thread_req {
61 enum ubd_req op;
62 int fds[2];
63 unsigned long offsets[2];
64 unsigned long long offset;
65 unsigned long length;
66 char *buffer;
67 int sectorsize;
68 unsigned long sector_mask;
69 unsigned long long cow_offset;
70 unsigned long bitmap_words[2];
71 int map_fd;
72 unsigned long long map_offset;
73 int error;
74};
75
76extern int open_ubd_file(char *file, struct openflags *openflags,
77 char **backing_file_out, int *bitmap_offset_out,
78 unsigned long *bitmap_len_out, int *data_offset_out,
79 int *create_cow_out);
80extern int create_cow_file(char *cow_file, char *backing_file,
81 struct openflags flags, int sectorsize,
82 int alignment, int *bitmap_offset_out,
83 unsigned long *bitmap_len_out,
84 int *data_offset_out);
85extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
86extern void do_io(struct io_thread_req *req);
87
88static inline int ubd_test_bit(__u64 bit, unsigned char *data)
89{
90 __u64 n;
91 int bits, off;
92
93 bits = sizeof(data[0]) * 8;
94 n = bit / bits;
95 off = bit % bits;
96 return((data[n] & (1 << off)) != 0);
97}
98
99static inline void ubd_set_bit(__u64 bit, unsigned char *data)
100{
101 __u64 n;
102 int bits, off;
103
104 bits = sizeof(data[0]) * 8;
105 n = bit / bits;
106 off = bit % bits;
107 data[n] |= (1 << off);
108}
109/*End stuff from ubd_user.h*/
110
111#define DRIVER_NAME "uml-blkdev"
112
113static DEFINE_SPINLOCK(ubd_io_lock);
114static DEFINE_SPINLOCK(ubd_lock);
115
116static void (*do_ubd)(void);
117
118static int ubd_open(struct inode * inode, struct file * filp);
119static int ubd_release(struct inode * inode, struct file * file);
120static int ubd_ioctl(struct inode * inode, struct file * file,
121 unsigned int cmd, unsigned long arg);
122
123#define MAX_DEV (8)
124
125/* Changed in early boot */
126static int ubd_do_mmap = 0;
127#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE
128
129static struct block_device_operations ubd_blops = {
130 .owner = THIS_MODULE,
131 .open = ubd_open,
132 .release = ubd_release,
133 .ioctl = ubd_ioctl,
134};
135
136/* Protected by the queue_lock */
137static request_queue_t *ubd_queue;
138
139/* Protected by ubd_lock */
140static int fake_major = MAJOR_NR;
141
142static struct gendisk *ubd_gendisk[MAX_DEV];
143static struct gendisk *fake_gendisk[MAX_DEV];
144
145#ifdef CONFIG_BLK_DEV_UBD_SYNC
146#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
147 .cl = 1 })
148#else
149#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
150 .cl = 1 })
151#endif
152
153/* Not protected - changed only in ubd_setup_common and then only to
154 * to enable O_SYNC.
155 */
156static struct openflags global_openflags = OPEN_FLAGS;
157
158struct cow {
159 char *file;
160 int fd;
161 unsigned long *bitmap;
162 unsigned long bitmap_len;
163 int bitmap_offset;
164 int data_offset;
165};
166
167struct ubd {
168 char *file;
169 int count;
170 int fd;
171 __u64 size;
172 struct openflags boot_openflags;
173 struct openflags openflags;
174 int no_cow;
175 struct cow cow;
176 struct platform_device pdev;
177
178 int map_writes;
179 int map_reads;
180 int nomap_writes;
181 int nomap_reads;
182 int write_maps;
183};
184
185#define DEFAULT_COW { \
186 .file = NULL, \
187 .fd = -1, \
188 .bitmap = NULL, \
189 .bitmap_offset = 0, \
190 .data_offset = 0, \
191}
192
193#define DEFAULT_UBD { \
194 .file = NULL, \
195 .count = 0, \
196 .fd = -1, \
197 .size = -1, \
198 .boot_openflags = OPEN_FLAGS, \
199 .openflags = OPEN_FLAGS, \
200 .no_cow = 0, \
201 .cow = DEFAULT_COW, \
202 .map_writes = 0, \
203 .map_reads = 0, \
204 .nomap_writes = 0, \
205 .nomap_reads = 0, \
206 .write_maps = 0, \
207}
208
209struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
210
211static int ubd0_init(void)
212{
213 struct ubd *dev = &ubd_dev[0];
214
215 if(dev->file == NULL)
216 dev->file = "root_fs";
217 return(0);
218}
219
220__initcall(ubd0_init);
221
222/* Only changed by fake_ide_setup which is a setup */
223static int fake_ide = 0;
224static struct proc_dir_entry *proc_ide_root = NULL;
225static struct proc_dir_entry *proc_ide = NULL;
226
227static void make_proc_ide(void)
228{
229 proc_ide_root = proc_mkdir("ide", NULL);
230 proc_ide = proc_mkdir("ide0", proc_ide_root);
231}
232
233static int proc_ide_read_media(char *page, char **start, off_t off, int count,
234 int *eof, void *data)
235{
236 int len;
237
238 strcpy(page, "disk\n");
239 len = strlen("disk\n");
240 len -= off;
241 if (len < count){
242 *eof = 1;
243 if (len <= 0) return 0;
244 }
245 else len = count;
246 *start = page + off;
247 return len;
248}
249
250static void make_ide_entries(char *dev_name)
251{
252 struct proc_dir_entry *dir, *ent;
253 char name[64];
254
255 if(proc_ide_root == NULL) make_proc_ide();
256
257 dir = proc_mkdir(dev_name, proc_ide);
258 if(!dir) return;
259
260 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
261 if(!ent) return;
262 ent->nlink = 1;
263 ent->data = NULL;
264 ent->read_proc = proc_ide_read_media;
265 ent->write_proc = NULL;
266 sprintf(name,"ide0/%s", dev_name);
267 proc_symlink(dev_name, proc_ide_root, name);
268}
269
270static int fake_ide_setup(char *str)
271{
272 fake_ide = 1;
273 return(1);
274}
275
276__setup("fake_ide", fake_ide_setup);
277
278__uml_help(fake_ide_setup,
279"fake_ide\n"
280" Create ide0 entries that map onto ubd devices.\n\n"
281);
282
283static int parse_unit(char **ptr)
284{
285 char *str = *ptr, *end;
286 int n = -1;
287
288 if(isdigit(*str)) {
289 n = simple_strtoul(str, &end, 0);
290 if(end == str)
291 return(-1);
292 *ptr = end;
293 }
294 else if (('a' <= *str) && (*str <= 'h')) {
295 n = *str - 'a';
296 str++;
297 *ptr = str;
298 }
299 return(n);
300}
301
302static int ubd_setup_common(char *str, int *index_out)
303{
304 struct ubd *dev;
305 struct openflags flags = global_openflags;
306 char *backing_file;
307 int n, err, i;
308
309 if(index_out) *index_out = -1;
310 n = *str;
311 if(n == '='){
312 char *end;
313 int major;
314
315 str++;
316 if(!strcmp(str, "mmap")){
317 CHOOSE_MODE(printk("mmap not supported by the ubd "
318 "driver in tt mode\n"),
319 ubd_do_mmap = 1);
320 return(0);
321 }
322
323 if(!strcmp(str, "sync")){
324 global_openflags = of_sync(global_openflags);
325 return(0);
326 }
327 major = simple_strtoul(str, &end, 0);
328 if((*end != '\0') || (end == str)){
329 printk(KERN_ERR
330 "ubd_setup : didn't parse major number\n");
331 return(1);
332 }
333
334 err = 1;
335 spin_lock(&ubd_lock);
336 if(fake_major != MAJOR_NR){
337 printk(KERN_ERR "Can't assign a fake major twice\n");
338 goto out1;
339 }
340
341 fake_major = major;
342
343 printk(KERN_INFO "Setting extra ubd major number to %d\n",
344 major);
345 err = 0;
346 out1:
347 spin_unlock(&ubd_lock);
348 return(err);
349 }
350
351 n = parse_unit(&str);
352 if(n < 0){
353 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
354 "'%s'\n", str);
355 return(1);
356 }
357 if(n >= MAX_DEV){
358 printk(KERN_ERR "ubd_setup : index %d out of range "
359 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
360 return(1);
361 }
362
363 err = 1;
364 spin_lock(&ubd_lock);
365
366 dev = &ubd_dev[n];
367 if(dev->file != NULL){
368 printk(KERN_ERR "ubd_setup : device already configured\n");
369 goto out;
370 }
371
372 if (index_out)
373 *index_out = n;
374
375 for (i = 0; i < 4; i++) {
376 switch (*str) {
377 case 'r':
378 flags.w = 0;
379 break;
380 case 's':
381 flags.s = 1;
382 break;
383 case 'd':
384 dev->no_cow = 1;
385 break;
386 case '=':
387 str++;
388 goto break_loop;
389 default:
390 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
391 goto out;
392 }
393 str++;
394 }
395
396 if (*str == '=')
397 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
398 else
399 printk(KERN_ERR "ubd_setup : Expected '='\n");
400 goto out;
401
402break_loop:
403 err = 0;
404 backing_file = strchr(str, ',');
405
406 if (!backing_file) {
407 backing_file = strchr(str, ':');
408 }
409
410 if(backing_file){
411 if(dev->no_cow)
412 printk(KERN_ERR "Can't specify both 'd' and a "
413 "cow file\n");
414 else {
415 *backing_file = '\0';
416 backing_file++;
417 }
418 }
419 dev->file = str;
420 dev->cow.file = backing_file;
421 dev->boot_openflags = flags;
422out:
423 spin_unlock(&ubd_lock);
424 return(err);
425}
426
427static int ubd_setup(char *str)
428{
429 ubd_setup_common(str, NULL);
430 return(1);
431}
432
433__setup("ubd", ubd_setup);
434__uml_help(ubd_setup,
435"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
436" This is used to associate a device with a file in the underlying\n"
437" filesystem. When specifying two filenames, the first one is the\n"
438" COW name and the second is the backing file name. As separator you can\n"
439" use either a ':' or a ',': the first one allows writing things like;\n"
440" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
441" while with a ',' the shell would not expand the 2nd '~'.\n"
442" When using only one filename, UML will detect whether to thread it like\n"
443" a COW file or a backing file. To override this detection, add the 'd'\n"
444" flag:\n"
445" ubd0d=BackingFile\n"
446" Usually, there is a filesystem in the file, but \n"
447" that's not required. Swap devices containing swap files can be\n"
448" specified like this. Also, a file which doesn't contain a\n"
449" filesystem can have its contents read in the virtual \n"
450" machine by running 'dd' on the device. <n> must be in the range\n"
451" 0 to 7. Appending an 'r' to the number will cause that device\n"
452" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
453" an 's' will cause data to be written to disk on the host immediately.\n\n"
454);
455
456static int udb_setup(char *str)
457{
458 printk("udb%s specified on command line is almost certainly a ubd -> "
459 "udb TYPO\n", str);
460 return(1);
461}
462
463__setup("udb", udb_setup);
464__uml_help(udb_setup,
465"udb\n"
466" This option is here solely to catch ubd -> udb typos, which can be\n\n"
467" to impossible to catch visually unless you specifically look for\n\n"
468" them. The only result of any option starting with 'udb' is an error\n\n"
469" in the boot output.\n\n"
470);
471
472static int fakehd_set = 0;
473static int fakehd(char *str)
474{
475 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
476 fakehd_set = 1;
477 return 1;
478}
479
480__setup("fakehd", fakehd);
481__uml_help(fakehd,
482"fakehd\n"
483" Change the ubd device name to \"hd\".\n\n"
484);
485
486static void do_ubd_request(request_queue_t * q);
487
488/* Only changed by ubd_init, which is an initcall. */
489int thread_fd = -1;
490
491/* Changed by ubd_handler, which is serialized because interrupts only
492 * happen on CPU 0.
493 */
494int intr_count = 0;
495
496/* call ubd_finish if you need to serialize */
497static void __ubd_finish(struct request *req, int error)
498{
499 int nsect;
500
501 if(error){
502 end_request(req, 0);
503 return;
504 }
505 nsect = req->current_nr_sectors;
506 req->sector += nsect;
507 req->buffer += nsect << 9;
508 req->errors = 0;
509 req->nr_sectors -= nsect;
510 req->current_nr_sectors = 0;
511 end_request(req, 1);
512}
513
514static inline void ubd_finish(struct request *req, int error)
515{
516 spin_lock(&ubd_io_lock);
517 __ubd_finish(req, error);
518 spin_unlock(&ubd_io_lock);
519}
520
521/* Called without ubd_io_lock held */
522static void ubd_handler(void)
523{
524 struct io_thread_req req;
525 struct request *rq = elv_next_request(ubd_queue);
526 int n, err;
527
528 do_ubd = NULL;
529 intr_count++;
530 n = os_read_file(thread_fd, &req, sizeof(req));
531 if(n != sizeof(req)){
532 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
533 "err = %d\n", os_getpid(), -n);
534 spin_lock(&ubd_io_lock);
535 end_request(rq, 0);
536 spin_unlock(&ubd_io_lock);
537 return;
538 }
539
540 if((req.op != UBD_MMAP) &&
541 ((req.offset != ((__u64) (rq->sector)) << 9) ||
542 (req.length != (rq->current_nr_sectors) << 9)))
543 panic("I/O op mismatch");
544
545 if(req.map_fd != -1){
546 err = physmem_subst_mapping(req.buffer, req.map_fd,
547 req.map_offset, 1);
548 if(err)
549 printk("ubd_handler - physmem_subst_mapping failed, "
550 "err = %d\n", -err);
551 }
552
553 ubd_finish(rq, req.error);
554 reactivate_fd(thread_fd, UBD_IRQ);
555 do_ubd_request(ubd_queue);
556}
557
558static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
559{
560 ubd_handler();
561 return(IRQ_HANDLED);
562}
563
564/* Only changed by ubd_init, which is an initcall. */
565static int io_pid = -1;
566
567void kill_io_thread(void)
568{
569 if(io_pid != -1)
570 os_kill_process(io_pid, 1);
571}
572
573__uml_exitcall(kill_io_thread);
574
575static int ubd_file_size(struct ubd *dev, __u64 *size_out)
576{
577 char *file;
578
579 file = dev->cow.file ? dev->cow.file : dev->file;
580 return(os_file_size(file, size_out));
581}
582
583static void ubd_close(struct ubd *dev)
584{
585 if(ubd_do_mmap)
586 physmem_forget_descriptor(dev->fd);
587 os_close_file(dev->fd);
588 if(dev->cow.file == NULL)
589 return;
590
591 if(ubd_do_mmap)
592 physmem_forget_descriptor(dev->cow.fd);
593 os_close_file(dev->cow.fd);
594 vfree(dev->cow.bitmap);
595 dev->cow.bitmap = NULL;
596}
597
598static int ubd_open_dev(struct ubd *dev)
599{
600 struct openflags flags;
601 char **back_ptr;
602 int err, create_cow, *create_ptr;
603
604 dev->openflags = dev->boot_openflags;
605 create_cow = 0;
606 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
607 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
608 dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
609 &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
610 &dev->cow.data_offset, create_ptr);
611
612 if((dev->fd == -ENOENT) && create_cow){
613 dev->fd = create_cow_file(dev->file, dev->cow.file,
614 dev->openflags, 1 << 9, PAGE_SIZE,
615 &dev->cow.bitmap_offset,
616 &dev->cow.bitmap_len,
617 &dev->cow.data_offset);
618 if(dev->fd >= 0){
619 printk(KERN_INFO "Creating \"%s\" as COW file for "
620 "\"%s\"\n", dev->file, dev->cow.file);
621 }
622 }
623
624 if(dev->fd < 0){
625 printk("Failed to open '%s', errno = %d\n", dev->file,
626 -dev->fd);
627 return(dev->fd);
628 }
629
630 if(dev->cow.file != NULL){
631 err = -ENOMEM;
632 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
633 if(dev->cow.bitmap == NULL){
634 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
635 goto error;
636 }
637 flush_tlb_kernel_vm();
638
639 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
640 dev->cow.bitmap_offset,
641 dev->cow.bitmap_len);
642 if(err < 0)
643 goto error;
644
645 flags = dev->openflags;
646 flags.w = 0;
647 err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
648 NULL, NULL);
649 if(err < 0) goto error;
650 dev->cow.fd = err;
651 }
652 return(0);
653 error:
654 os_close_file(dev->fd);
655 return(err);
656}
657
658static int ubd_new_disk(int major, u64 size, int unit,
659 struct gendisk **disk_out)
660
661{
662 struct gendisk *disk;
663 char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
664 int err;
665
666 disk = alloc_disk(1 << UBD_SHIFT);
667 if(disk == NULL)
668 return(-ENOMEM);
669
670 disk->major = major;
671 disk->first_minor = unit << UBD_SHIFT;
672 disk->fops = &ubd_blops;
673 set_capacity(disk, size / 512);
674 if(major == MAJOR_NR){
675 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
676 sprintf(disk->devfs_name, "ubd/disc%d", unit);
677 sprintf(from, "ubd/%d", unit);
678 sprintf(to, "disc%d/disc", unit);
679 err = devfs_mk_symlink(from, to);
680 if(err)
681 printk("ubd_new_disk failed to make link from %s to "
682 "%s, error = %d\n", from, to, err);
683 }
684 else {
685 sprintf(disk->disk_name, "ubd_fake%d", unit);
686 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
687 }
688
689 /* sysfs register (not for ide fake devices) */
690 if (major == MAJOR_NR) {
691 ubd_dev[unit].pdev.id = unit;
692 ubd_dev[unit].pdev.name = DRIVER_NAME;
693 platform_device_register(&ubd_dev[unit].pdev);
694 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
695 }
696
697 disk->private_data = &ubd_dev[unit];
698 disk->queue = ubd_queue;
699 add_disk(disk);
700
701 *disk_out = disk;
702 return 0;
703}
704
705#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
706
707static int ubd_add(int n)
708{
709 struct ubd *dev = &ubd_dev[n];
710 int err;
711
712 if(dev->file == NULL)
713 return(-ENODEV);
714
715 if (ubd_open_dev(dev))
716 return(-ENODEV);
717
718 err = ubd_file_size(dev, &dev->size);
719 if(err < 0)
720 return(err);
721
722 dev->size = ROUND_BLOCK(dev->size);
723
724 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
725 if(err)
726 return(err);
727
728 if(fake_major != MAJOR_NR)
729 ubd_new_disk(fake_major, dev->size, n,
730 &fake_gendisk[n]);
731
732 /* perhaps this should also be under the "if (fake_major)" above */
733 /* using the fake_disk->disk_name and also the fakehd_set name */
734 if (fake_ide)
735 make_ide_entries(ubd_gendisk[n]->disk_name);
736
737 ubd_close(dev);
738 return 0;
739}
740
741static int ubd_config(char *str)
742{
743 int n, err;
744
745 str = uml_strdup(str);
746 if(str == NULL){
747 printk(KERN_ERR "ubd_config failed to strdup string\n");
748 return(1);
749 }
750 err = ubd_setup_common(str, &n);
751 if(err){
752 kfree(str);
753 return(-1);
754 }
755 if(n == -1) return(0);
756
757 spin_lock(&ubd_lock);
758 err = ubd_add(n);
759 if(err)
760 ubd_dev[n].file = NULL;
761 spin_unlock(&ubd_lock);
762
763 return(err);
764}
765
766static int ubd_get_config(char *name, char *str, int size, char **error_out)
767{
768 struct ubd *dev;
769 int n, len = 0;
770
771 n = parse_unit(&name);
772 if((n >= MAX_DEV) || (n < 0)){
773 *error_out = "ubd_get_config : device number out of range";
774 return(-1);
775 }
776
777 dev = &ubd_dev[n];
778 spin_lock(&ubd_lock);
779
780 if(dev->file == NULL){
781 CONFIG_CHUNK(str, size, len, "", 1);
782 goto out;
783 }
784
785 CONFIG_CHUNK(str, size, len, dev->file, 0);
786
787 if(dev->cow.file != NULL){
788 CONFIG_CHUNK(str, size, len, ",", 0);
789 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
790 }
791 else CONFIG_CHUNK(str, size, len, "", 1);
792
793 out:
794 spin_unlock(&ubd_lock);
795 return(len);
796}
797
798static int ubd_remove(char *str)
799{
800 struct ubd *dev;
801 int n, err = -ENODEV;
802
803 n = parse_unit(&str);
804
805 if((n < 0) || (n >= MAX_DEV))
806 return(err);
807
808 dev = &ubd_dev[n];
809 if(dev->count > 0)
810 return(-EBUSY); /* you cannot remove a open disk */
811
812 err = 0;
813 spin_lock(&ubd_lock);
814
815 if(ubd_gendisk[n] == NULL)
816 goto out;
817
818 del_gendisk(ubd_gendisk[n]);
819 put_disk(ubd_gendisk[n]);
820 ubd_gendisk[n] = NULL;
821
822 if(fake_gendisk[n] != NULL){
823 del_gendisk(fake_gendisk[n]);
824 put_disk(fake_gendisk[n]);
825 fake_gendisk[n] = NULL;
826 }
827
828 platform_device_unregister(&dev->pdev);
829 *dev = ((struct ubd) DEFAULT_UBD);
830 err = 0;
831 out:
832 spin_unlock(&ubd_lock);
833 return(err);
834}
835
836static struct mc_device ubd_mc = {
837 .name = "ubd",
838 .config = ubd_config,
839 .get_config = ubd_get_config,
840 .remove = ubd_remove,
841};
842
843static int ubd_mc_init(void)
844{
845 mconsole_register_dev(&ubd_mc);
846 return 0;
847}
848
849__initcall(ubd_mc_init);
850
851static struct device_driver ubd_driver = {
852 .name = DRIVER_NAME,
853 .bus = &platform_bus_type,
854};
855
856int ubd_init(void)
857{
858 int i;
859
860 devfs_mk_dir("ubd");
861 if (register_blkdev(MAJOR_NR, "ubd"))
862 return -1;
863
864 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
865 if (!ubd_queue) {
866 unregister_blkdev(MAJOR_NR, "ubd");
867 return -1;
868 }
869
870 if (fake_major != MAJOR_NR) {
871 char name[sizeof("ubd_nnn\0")];
872
873 snprintf(name, sizeof(name), "ubd_%d", fake_major);
874 devfs_mk_dir(name);
875 if (register_blkdev(fake_major, "ubd"))
876 return -1;
877 }
878 driver_register(&ubd_driver);
879 for (i = 0; i < MAX_DEV; i++)
880 ubd_add(i);
881 return 0;
882}
883
884late_initcall(ubd_init);
885
886int ubd_driver_init(void){
887 unsigned long stack;
888 int err;
889
890 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
891 if(global_openflags.s){
892 printk(KERN_INFO "ubd: Synchronous mode\n");
893 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
894 * enough. So use anyway the io thread. */
895 }
896 stack = alloc_stack(0, 0);
897 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
898 &thread_fd);
899 if(io_pid < 0){
900 printk(KERN_ERR
901 "ubd : Failed to start I/O thread (errno = %d) - "
902 "falling back to synchronous I/O\n", -io_pid);
903 io_pid = -1;
904 return(0);
905 }
906 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
907 SA_INTERRUPT, "ubd", ubd_dev);
908 if(err != 0)
909 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
910 return(err);
911}
912
913device_initcall(ubd_driver_init);
914
915static int ubd_open(struct inode *inode, struct file *filp)
916{
917 struct gendisk *disk = inode->i_bdev->bd_disk;
918 struct ubd *dev = disk->private_data;
919 int err = 0;
920
921 if(dev->count == 0){
922 err = ubd_open_dev(dev);
923 if(err){
924 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
925 disk->disk_name, dev->file, -err);
926 goto out;
927 }
928 }
929 dev->count++;
930 if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
931 if(--dev->count == 0) ubd_close(dev);
932 err = -EROFS;
933 }
934 out:
935 return(err);
936}
937
938static int ubd_release(struct inode * inode, struct file * file)
939{
940 struct gendisk *disk = inode->i_bdev->bd_disk;
941 struct ubd *dev = disk->private_data;
942
943 if(--dev->count == 0)
944 ubd_close(dev);
945 return(0);
946}
947
948static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
949 __u64 *cow_offset, unsigned long *bitmap,
950 __u64 bitmap_offset, unsigned long *bitmap_words,
951 __u64 bitmap_len)
952{
953 __u64 sector = io_offset >> 9;
954 int i, update_bitmap = 0;
955
956 for(i = 0; i < length >> 9; i++){
957 if(cow_mask != NULL)
958 ubd_set_bit(i, (unsigned char *) cow_mask);
959 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
960 continue;
961
962 update_bitmap = 1;
963 ubd_set_bit(sector + i, (unsigned char *) bitmap);
964 }
965
966 if(!update_bitmap)
967 return;
968
969 *cow_offset = sector / (sizeof(unsigned long) * 8);
970
971 /* This takes care of the case where we're exactly at the end of the
972 * device, and *cow_offset + 1 is off the end. So, just back it up
973 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
974 * for the original diagnosis.
975 */
976 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
977 sizeof(unsigned long) - 1))
978 (*cow_offset)--;
979
980 bitmap_words[0] = bitmap[*cow_offset];
981 bitmap_words[1] = bitmap[*cow_offset + 1];
982
983 *cow_offset *= sizeof(unsigned long);
984 *cow_offset += bitmap_offset;
985}
986
987static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
988 __u64 bitmap_offset, __u64 bitmap_len)
989{
990 __u64 sector = req->offset >> 9;
991 int i;
992
993 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
994 panic("Operation too long");
995
996 if(req->op == UBD_READ) {
997 for(i = 0; i < req->length >> 9; i++){
998 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
999 ubd_set_bit(i, (unsigned char *)
1000 &req->sector_mask);
1001 }
1002 }
1003 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1004 &req->cow_offset, bitmap, bitmap_offset,
1005 req->bitmap_words, bitmap_len);
1006}
1007
1008static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset)
1009{
1010 __u64 sector;
1011 unsigned char *bitmap;
1012 int bit, i;
1013
1014 /* mmap must have been requested on the command line */
1015 if(!ubd_do_mmap)
1016 return(-1);
1017
1018 /* The buffer must be page aligned */
1019 if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0)
1020 return(-1);
1021
1022 /* The request must be a page long */
1023 if((req->current_nr_sectors << 9) != PAGE_SIZE)
1024 return(-1);
1025
1026 if(dev->cow.file == NULL)
1027 return(dev->fd);
1028
1029 sector = offset >> 9;
1030 bitmap = (unsigned char *) dev->cow.bitmap;
1031 bit = ubd_test_bit(sector, bitmap);
1032
1033 for(i = 1; i < req->current_nr_sectors; i++){
1034 if(ubd_test_bit(sector + i, bitmap) != bit)
1035 return(-1);
1036 }
1037
1038 if(bit || (rq_data_dir(req) == WRITE))
1039 offset += dev->cow.data_offset;
1040
1041 /* The data on disk must be page aligned */
1042 if((offset % UBD_MMAP_BLOCK_SIZE) != 0)
1043 return(-1);
1044
1045 return(bit ? dev->fd : dev->cow.fd);
1046}
1047
1048static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset,
1049 struct request *req,
1050 struct io_thread_req *io_req)
1051{
1052 int err;
1053
1054 if(rq_data_dir(req) == WRITE){
1055 /* Writes are almost no-ops since the new data is already in the
1056 * host page cache
1057 */
1058 dev->map_writes++;
1059 if(dev->cow.file != NULL)
1060 cowify_bitmap(io_req->offset, io_req->length,
1061 &io_req->sector_mask, &io_req->cow_offset,
1062 dev->cow.bitmap, dev->cow.bitmap_offset,
1063 io_req->bitmap_words,
1064 dev->cow.bitmap_len);
1065 }
1066 else {
1067 int w;
1068
1069 if((dev->cow.file != NULL) && (fd == dev->cow.fd))
1070 w = 0;
1071 else w = dev->openflags.w;
1072
1073 if((dev->cow.file != NULL) && (fd == dev->fd))
1074 offset += dev->cow.data_offset;
1075
1076 err = physmem_subst_mapping(req->buffer, fd, offset, w);
1077 if(err){
1078 printk("physmem_subst_mapping failed, err = %d\n",
1079 -err);
1080 return(1);
1081 }
1082 dev->map_reads++;
1083 }
1084 io_req->op = UBD_MMAP;
1085 io_req->buffer = req->buffer;
1086 return(0);
1087}
1088
1089/* Called with ubd_io_lock held */
1090static int prepare_request(struct request *req, struct io_thread_req *io_req)
1091{
1092 struct gendisk *disk = req->rq_disk;
1093 struct ubd *dev = disk->private_data;
1094 __u64 offset;
1095 int len, fd;
1096
1097 if(req->rq_status == RQ_INACTIVE) return(1);
1098
1099 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
1100 printk("Write attempted on readonly ubd device %s\n",
1101 disk->disk_name);
1102 end_request(req, 0);
1103 return(1);
1104 }
1105
1106 offset = ((__u64) req->sector) << 9;
1107 len = req->current_nr_sectors << 9;
1108
1109 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1110 io_req->fds[1] = dev->fd;
1111 io_req->map_fd = -1;
1112 io_req->cow_offset = -1;
1113 io_req->offset = offset;
1114 io_req->length = len;
1115 io_req->error = 0;
1116 io_req->sector_mask = 0;
1117
1118 fd = mmap_fd(req, dev, io_req->offset);
1119 if(fd > 0){
1120 /* If mmapping is otherwise OK, but the first access to the
1121 * page is a write, then it's not mapped in yet. So we have
1122 * to write the data to disk first, then we can map the disk
1123 * page in and continue normally from there.
1124 */
1125 if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){
1126 io_req->map_fd = dev->fd;
1127 io_req->map_offset = io_req->offset +
1128 dev->cow.data_offset;
1129 dev->write_maps++;
1130 }
1131 else return(prepare_mmap_request(dev, fd, io_req->offset, req,
1132 io_req));
1133 }
1134
1135 if(rq_data_dir(req) == READ)
1136 dev->nomap_reads++;
1137 else dev->nomap_writes++;
1138
1139 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1140 io_req->offsets[0] = 0;
1141 io_req->offsets[1] = dev->cow.data_offset;
1142 io_req->buffer = req->buffer;
1143 io_req->sectorsize = 1 << 9;
1144
1145 if(dev->cow.file != NULL)
1146 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1147 dev->cow.bitmap_len);
1148
1149 return(0);
1150}
1151
1152/* Called with ubd_io_lock held */
1153static void do_ubd_request(request_queue_t *q)
1154{
1155 struct io_thread_req io_req;
1156 struct request *req;
1157 int err, n;
1158
1159 if(thread_fd == -1){
1160 while((req = elv_next_request(q)) != NULL){
1161 err = prepare_request(req, &io_req);
1162 if(!err){
1163 do_io(&io_req);
1164 __ubd_finish(req, io_req.error);
1165 }
1166 }
1167 }
1168 else {
1169 if(do_ubd || (req = elv_next_request(q)) == NULL)
1170 return;
1171 err = prepare_request(req, &io_req);
1172 if(!err){
1173 do_ubd = ubd_handler;
1174 n = os_write_file(thread_fd, (char *) &io_req,
1175 sizeof(io_req));
1176 if(n != sizeof(io_req))
1177 printk("write to io thread failed, "
1178 "errno = %d\n", -n);
1179 }
1180 }
1181}
1182
1183static int ubd_ioctl(struct inode * inode, struct file * file,
1184 unsigned int cmd, unsigned long arg)
1185{
1186 struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1187 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1188 struct hd_driveid ubd_id = {
1189 .cyls = 0,
1190 .heads = 128,
1191 .sectors = 32,
1192 };
1193
1194 switch (cmd) {
1195 struct hd_geometry g;
1196 struct cdrom_volctrl volume;
1197 case HDIO_GETGEO:
1198 if(!loc) return(-EINVAL);
1199 g.heads = 128;
1200 g.sectors = 32;
1201 g.cylinders = dev->size / (128 * 32 * 512);
1202 g.start = get_start_sect(inode->i_bdev);
1203 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1204
1205 case HDIO_GET_IDENTITY:
1206 ubd_id.cyls = dev->size / (128 * 32 * 512);
1207 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1208 sizeof(ubd_id)))
1209 return(-EFAULT);
1210 return(0);
1211
1212 case CDROMVOLREAD:
1213 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1214 return(-EFAULT);
1215 volume.channel0 = 255;
1216 volume.channel1 = 255;
1217 volume.channel2 = 255;
1218 volume.channel3 = 255;
1219 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1220 return(-EFAULT);
1221 return(0);
1222 }
1223 return(-EINVAL);
1224}
1225
1226static int ubd_check_remapped(int fd, unsigned long address, int is_write,
1227 __u64 offset)
1228{
1229 __u64 bitmap_offset;
1230 unsigned long new_bitmap[2];
1231 int i, err, n;
1232
1233 /* If it's not a write access, we can't do anything about it */
1234 if(!is_write)
1235 return(0);
1236
1237 /* We have a write */
1238 for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){
1239 struct ubd *dev = &ubd_dev[i];
1240
1241 if((dev->fd != fd) && (dev->cow.fd != fd))
1242 continue;
1243
1244 /* It's a write to a ubd device */
1245
1246 if(!dev->openflags.w){
1247 /* It's a write access on a read-only device - probably
1248 * shouldn't happen. If the kernel is trying to change
1249 * something with no intention of writing it back out,
1250 * then this message will clue us in that this needs
1251 * fixing
1252 */
1253 printk("Write access to mapped page from readonly ubd "
1254 "device %d\n", i);
1255 return(0);
1256 }
1257
1258 /* It's a write to a writeable ubd device - it must be COWed
1259 * because, otherwise, the page would have been mapped in
1260 * writeable
1261 */
1262
1263 if(!dev->cow.file)
1264 panic("Write fault on writeable non-COW ubd device %d",
1265 i);
1266
1267 /* It should also be an access to the backing file since the
1268 * COW pages should be mapped in read-write
1269 */
1270
1271 if(fd == dev->fd)
1272 panic("Write fault on a backing page of ubd "
1273 "device %d\n", i);
1274
1275 /* So, we do the write, copying the backing data to the COW
1276 * file...
1277 */
1278
1279 err = os_seek_file(dev->fd, offset + dev->cow.data_offset);
1280 if(err < 0)
1281 panic("Couldn't seek to %lld in COW file of ubd "
1282 "device %d, err = %d",
1283 offset + dev->cow.data_offset, i, -err);
1284
1285 n = os_write_file(dev->fd, (void *) address, PAGE_SIZE);
1286 if(n != PAGE_SIZE)
1287 panic("Couldn't copy data to COW file of ubd "
1288 "device %d, err = %d", i, -n);
1289
1290 /* ... updating the COW bitmap... */
1291
1292 cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset,
1293 dev->cow.bitmap, dev->cow.bitmap_offset,
1294 new_bitmap, dev->cow.bitmap_len);
1295
1296 err = os_seek_file(dev->fd, bitmap_offset);
1297 if(err < 0)
1298 panic("Couldn't seek to %lld in COW file of ubd "
1299 "device %d, err = %d", bitmap_offset, i, -err);
1300
1301 n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap));
1302 if(n != sizeof(new_bitmap))
1303 panic("Couldn't update bitmap of ubd device %d, "
1304 "err = %d", i, -n);
1305
1306 /* Maybe we can map the COW page in, and maybe we can't. If
1307 * it is a pre-V3 COW file, we can't, since the alignment will
1308 * be wrong. If it is a V3 or later COW file which has been
1309 * moved to a system with a larger page size, then maybe we
1310 * can't, depending on the exact location of the page.
1311 */
1312
1313 offset += dev->cow.data_offset;
1314
1315 /* Remove the remapping, putting the original anonymous page
1316 * back. If the COW file can be mapped in, that is done.
1317 * Otherwise, the COW page is read in.
1318 */
1319
1320 if(!physmem_remove_mapping((void *) address))
1321 panic("Address 0x%lx not remapped by ubd device %d",
1322 address, i);
1323 if((offset % UBD_MMAP_BLOCK_SIZE) == 0)
1324 physmem_subst_mapping((void *) address, dev->fd,
1325 offset, 1);
1326 else {
1327 err = os_seek_file(dev->fd, offset);
1328 if(err < 0)
1329 panic("Couldn't seek to %lld in COW file of "
1330 "ubd device %d, err = %d", offset, i,
1331 -err);
1332
1333 n = os_read_file(dev->fd, (void *) address, PAGE_SIZE);
1334 if(n != PAGE_SIZE)
1335 panic("Failed to read page from offset %llx of "
1336 "COW file of ubd device %d, err = %d",
1337 offset, i, -n);
1338 }
1339
1340 return(1);
1341 }
1342
1343 /* It's not a write on a ubd device */
1344 return(0);
1345}
1346
1347static struct remapper ubd_remapper = {
1348 .list = LIST_HEAD_INIT(ubd_remapper.list),
1349 .proc = ubd_check_remapped,
1350};
1351
1352static int ubd_remapper_setup(void)
1353{
1354 if(ubd_do_mmap)
1355 register_remapper(&ubd_remapper);
1356
1357 return(0);
1358}
1359
1360__initcall(ubd_remapper_setup);
1361
1362static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1363{
1364 struct uml_stat buf1, buf2;
1365 int err;
1366
1367 if(from_cmdline == NULL) return(1);
1368 if(!strcmp(from_cmdline, from_cow)) return(1);
1369
1370 err = os_stat_file(from_cmdline, &buf1);
1371 if(err < 0){
1372 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1373 return(1);
1374 }
1375 err = os_stat_file(from_cow, &buf2);
1376 if(err < 0){
1377 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1378 return(1);
1379 }
1380 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1381 return(1);
1382
1383 printk("Backing file mismatch - \"%s\" requested,\n"
1384 "\"%s\" specified in COW header of \"%s\"\n",
1385 from_cmdline, from_cow, cow);
1386 return(0);
1387}
1388
1389static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1390{
1391 unsigned long modtime;
1392 long long actual;
1393 int err;
1394
1395 err = os_file_modtime(file, &modtime);
1396 if(err < 0){
1397 printk("Failed to get modification time of backing file "
1398 "\"%s\", err = %d\n", file, -err);
1399 return(err);
1400 }
1401
1402 err = os_file_size(file, &actual);
1403 if(err < 0){
1404 printk("Failed to get size of backing file \"%s\", "
1405 "err = %d\n", file, -err);
1406 return(err);
1407 }
1408
1409 if(actual != size){
1410 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1411 * the typecast.*/
1412 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1413 "file\n", (unsigned long long) size, actual);
1414 return(-EINVAL);
1415 }
1416 if(modtime != mtime){
1417 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1418 "file\n", mtime, modtime);
1419 return(-EINVAL);
1420 }
1421 return(0);
1422}
1423
1424int read_cow_bitmap(int fd, void *buf, int offset, int len)
1425{
1426 int err;
1427
1428 err = os_seek_file(fd, offset);
1429 if(err < 0)
1430 return(err);
1431
1432 err = os_read_file(fd, buf, len);
1433 if(err < 0)
1434 return(err);
1435
1436 return(0);
1437}
1438
1439int open_ubd_file(char *file, struct openflags *openflags,
1440 char **backing_file_out, int *bitmap_offset_out,
1441 unsigned long *bitmap_len_out, int *data_offset_out,
1442 int *create_cow_out)
1443{
1444 time_t mtime;
1445 unsigned long long size;
1446 __u32 version, align;
1447 char *backing_file;
1448 int fd, err, sectorsize, same, mode = 0644;
1449
1450 fd = os_open_file(file, *openflags, mode);
1451 if(fd < 0){
1452 if((fd == -ENOENT) && (create_cow_out != NULL))
1453 *create_cow_out = 1;
1454 if(!openflags->w ||
1455 ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1456 openflags->w = 0;
1457 fd = os_open_file(file, *openflags, mode);
1458 if(fd < 0)
1459 return(fd);
1460 }
1461
1462 err = os_lock_file(fd, openflags->w);
1463 if(err < 0){
1464 printk("Failed to lock '%s', err = %d\n", file, -err);
1465 goto out_close;
1466 }
1467
1468 if(backing_file_out == NULL) return(fd);
1469
1470 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1471 &size, &sectorsize, &align, bitmap_offset_out);
1472 if(err && (*backing_file_out != NULL)){
1473 printk("Failed to read COW header from COW file \"%s\", "
1474 "errno = %d\n", file, -err);
1475 goto out_close;
1476 }
1477 if(err) return(fd);
1478
1479 if(backing_file_out == NULL) return(fd);
1480
1481 same = same_backing_files(*backing_file_out, backing_file, file);
1482
1483 if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1484 printk("Switching backing file to '%s'\n", *backing_file_out);
1485 err = write_cow_header(file, fd, *backing_file_out,
1486 sectorsize, align, &size);
1487 if(err){
1488 printk("Switch failed, errno = %d\n", -err);
1489 return(err);
1490 }
1491 }
1492 else {
1493 *backing_file_out = backing_file;
1494 err = backing_file_mismatch(*backing_file_out, size, mtime);
1495 if(err) goto out_close;
1496 }
1497
1498 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1499 bitmap_len_out, data_offset_out);
1500
1501 return(fd);
1502 out_close:
1503 os_close_file(fd);
1504 return(err);
1505}
1506
1507int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1508 int sectorsize, int alignment, int *bitmap_offset_out,
1509 unsigned long *bitmap_len_out, int *data_offset_out)
1510{
1511 int err, fd;
1512
1513 flags.c = 1;
1514 fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1515 if(fd < 0){
1516 err = fd;
1517 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1518 -err);
1519 goto out;
1520 }
1521
1522 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1523 bitmap_offset_out, bitmap_len_out,
1524 data_offset_out);
1525 if(!err)
1526 return(fd);
1527 os_close_file(fd);
1528 out:
1529 return(err);
1530}
1531
1532static int update_bitmap(struct io_thread_req *req)
1533{
1534 int n;
1535
1536 if(req->cow_offset == -1)
1537 return(0);
1538
1539 n = os_seek_file(req->fds[1], req->cow_offset);
1540 if(n < 0){
1541 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1542 return(1);
1543 }
1544
1545 n = os_write_file(req->fds[1], &req->bitmap_words,
1546 sizeof(req->bitmap_words));
1547 if(n != sizeof(req->bitmap_words)){
1548 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1549 req->fds[1]);
1550 return(1);
1551 }
1552
1553 return(0);
1554}
1555
1556void do_io(struct io_thread_req *req)
1557{
1558 char *buf;
1559 unsigned long len;
1560 int n, nsectors, start, end, bit;
1561 int err;
1562 __u64 off;
1563
1564 if(req->op == UBD_MMAP){
1565 /* Touch the page to force the host to do any necessary IO to
1566 * get it into memory
1567 */
1568 n = *((volatile int *) req->buffer);
1569 req->error = update_bitmap(req);
1570 return;
1571 }
1572
1573 nsectors = req->length / req->sectorsize;
1574 start = 0;
1575 do {
1576 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1577 end = start;
1578 while((end < nsectors) &&
1579 (ubd_test_bit(end, (unsigned char *)
1580 &req->sector_mask) == bit))
1581 end++;
1582
1583 off = req->offset + req->offsets[bit] +
1584 start * req->sectorsize;
1585 len = (end - start) * req->sectorsize;
1586 buf = &req->buffer[start * req->sectorsize];
1587
1588 err = os_seek_file(req->fds[bit], off);
1589 if(err < 0){
1590 printk("do_io - lseek failed : err = %d\n", -err);
1591 req->error = 1;
1592 return;
1593 }
1594 if(req->op == UBD_READ){
1595 n = 0;
1596 do {
1597 buf = &buf[n];
1598 len -= n;
1599 n = os_read_file(req->fds[bit], buf, len);
1600 if (n < 0) {
1601 printk("do_io - read failed, err = %d "
1602 "fd = %d\n", -n, req->fds[bit]);
1603 req->error = 1;
1604 return;
1605 }
1606 } while((n < len) && (n != 0));
1607 if (n < len) memset(&buf[n], 0, len - n);
1608 }
1609 else {
1610 n = os_write_file(req->fds[bit], buf, len);
1611 if(n != len){
1612 printk("do_io - write failed err = %d "
1613 "fd = %d\n", -n, req->fds[bit]);
1614 req->error = 1;
1615 return;
1616 }
1617 }
1618
1619 start = end;
1620 } while(start < nsectors);
1621
1622 req->error = update_bitmap(req);
1623}
1624
1625/* Changed in start_io_thread, which is serialized by being called only
1626 * from ubd_init, which is an initcall.
1627 */
1628int kernel_fd = -1;
1629
1630/* Only changed by the io thread */
1631int io_count = 0;
1632
1633int io_thread(void *arg)
1634{
1635 struct io_thread_req req;
1636 int n;
1637
1638 ignore_sigwinch_sig();
1639 while(1){
1640 n = os_read_file(kernel_fd, &req, sizeof(req));
1641 if(n != sizeof(req)){
1642 if(n < 0)
1643 printk("io_thread - read failed, fd = %d, "
1644 "err = %d\n", kernel_fd, -n);
1645 else {
1646 printk("io_thread - short read, fd = %d, "
1647 "length = %d\n", kernel_fd, n);
1648 }
1649 continue;
1650 }
1651 io_count++;
1652 do_io(&req);
1653 n = os_write_file(kernel_fd, &req, sizeof(req));
1654 if(n != sizeof(req))
1655 printk("io_thread - write failed, fd = %d, err = %d\n",
1656 kernel_fd, -n);
1657 }
1658}
1659
1660/*
1661 * Overrides for Emacs so that we follow Linus's tabbing style.
1662 * Emacs will notice this stuff at the end of the file and automatically
1663 * adjust the settings for this buffer only. This must remain at the end
1664 * of the file.
1665 * ---------------------------------------------------------------------------
1666 * Local variables:
1667 * c-file-style: "linux"
1668 * End:
1669 */