diff options
Diffstat (limited to 'arch/um/drivers/ubd_kern.c')
-rw-r--r-- | arch/um/drivers/ubd_kern.c | 1669 |
1 files changed, 1669 insertions, 0 deletions
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c new file mode 100644 index 000000000000..4d8b165bfa48 --- /dev/null +++ b/arch/um/drivers/ubd_kern.c | |||
@@ -0,0 +1,1669 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) | ||
3 | * Licensed under the GPL | ||
4 | */ | ||
5 | |||
6 | /* 2001-09-28...2002-04-17 | ||
7 | * Partition stuff by James_McMechan@hotmail.com | ||
8 | * old style ubd by setting UBD_SHIFT to 0 | ||
9 | * 2002-09-27...2002-10-18 massive tinkering for 2.5 | ||
10 | * partitions have changed in 2.5 | ||
11 | * 2003-01-29 more tinkering for 2.5.59-1 | ||
12 | * This should now address the sysfs problems and has | ||
13 | * the symlink for devfs to allow for booting with | ||
14 | * the common /dev/ubd/discX/... names rather than | ||
15 | * only /dev/ubdN/discN this version also has lots of | ||
16 | * clean ups preparing for ubd-many. | ||
17 | * James McMechan | ||
18 | */ | ||
19 | |||
20 | #define MAJOR_NR UBD_MAJOR | ||
21 | #define UBD_SHIFT 4 | ||
22 | |||
23 | #include "linux/config.h" | ||
24 | #include "linux/module.h" | ||
25 | #include "linux/blkdev.h" | ||
26 | #include "linux/hdreg.h" | ||
27 | #include "linux/init.h" | ||
28 | #include "linux/devfs_fs_kernel.h" | ||
29 | #include "linux/cdrom.h" | ||
30 | #include "linux/proc_fs.h" | ||
31 | #include "linux/ctype.h" | ||
32 | #include "linux/capability.h" | ||
33 | #include "linux/mm.h" | ||
34 | #include "linux/vmalloc.h" | ||
35 | #include "linux/blkpg.h" | ||
36 | #include "linux/genhd.h" | ||
37 | #include "linux/spinlock.h" | ||
38 | #include "asm/segment.h" | ||
39 | #include "asm/uaccess.h" | ||
40 | #include "asm/irq.h" | ||
41 | #include "asm/types.h" | ||
42 | #include "asm/tlbflush.h" | ||
43 | #include "user_util.h" | ||
44 | #include "mem_user.h" | ||
45 | #include "kern_util.h" | ||
46 | #include "kern.h" | ||
47 | #include "mconsole_kern.h" | ||
48 | #include "init.h" | ||
49 | #include "irq_user.h" | ||
50 | #include "irq_kern.h" | ||
51 | #include "ubd_user.h" | ||
52 | #include "2_5compat.h" | ||
53 | #include "os.h" | ||
54 | #include "mem.h" | ||
55 | #include "mem_kern.h" | ||
56 | #include "cow.h" | ||
57 | |||
58 | enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; | ||
59 | |||
60 | struct io_thread_req { | ||
61 | enum ubd_req op; | ||
62 | int fds[2]; | ||
63 | unsigned long offsets[2]; | ||
64 | unsigned long long offset; | ||
65 | unsigned long length; | ||
66 | char *buffer; | ||
67 | int sectorsize; | ||
68 | unsigned long sector_mask; | ||
69 | unsigned long long cow_offset; | ||
70 | unsigned long bitmap_words[2]; | ||
71 | int map_fd; | ||
72 | unsigned long long map_offset; | ||
73 | int error; | ||
74 | }; | ||
75 | |||
76 | extern int open_ubd_file(char *file, struct openflags *openflags, | ||
77 | char **backing_file_out, int *bitmap_offset_out, | ||
78 | unsigned long *bitmap_len_out, int *data_offset_out, | ||
79 | int *create_cow_out); | ||
80 | extern int create_cow_file(char *cow_file, char *backing_file, | ||
81 | struct openflags flags, int sectorsize, | ||
82 | int alignment, int *bitmap_offset_out, | ||
83 | unsigned long *bitmap_len_out, | ||
84 | int *data_offset_out); | ||
85 | extern int read_cow_bitmap(int fd, void *buf, int offset, int len); | ||
86 | extern void do_io(struct io_thread_req *req); | ||
87 | |||
88 | static inline int ubd_test_bit(__u64 bit, unsigned char *data) | ||
89 | { | ||
90 | __u64 n; | ||
91 | int bits, off; | ||
92 | |||
93 | bits = sizeof(data[0]) * 8; | ||
94 | n = bit / bits; | ||
95 | off = bit % bits; | ||
96 | return((data[n] & (1 << off)) != 0); | ||
97 | } | ||
98 | |||
99 | static inline void ubd_set_bit(__u64 bit, unsigned char *data) | ||
100 | { | ||
101 | __u64 n; | ||
102 | int bits, off; | ||
103 | |||
104 | bits = sizeof(data[0]) * 8; | ||
105 | n = bit / bits; | ||
106 | off = bit % bits; | ||
107 | data[n] |= (1 << off); | ||
108 | } | ||
109 | /*End stuff from ubd_user.h*/ | ||
110 | |||
111 | #define DRIVER_NAME "uml-blkdev" | ||
112 | |||
113 | static DEFINE_SPINLOCK(ubd_io_lock); | ||
114 | static DEFINE_SPINLOCK(ubd_lock); | ||
115 | |||
116 | static void (*do_ubd)(void); | ||
117 | |||
118 | static int ubd_open(struct inode * inode, struct file * filp); | ||
119 | static int ubd_release(struct inode * inode, struct file * file); | ||
120 | static int ubd_ioctl(struct inode * inode, struct file * file, | ||
121 | unsigned int cmd, unsigned long arg); | ||
122 | |||
123 | #define MAX_DEV (8) | ||
124 | |||
125 | /* Changed in early boot */ | ||
126 | static int ubd_do_mmap = 0; | ||
127 | #define UBD_MMAP_BLOCK_SIZE PAGE_SIZE | ||
128 | |||
129 | static struct block_device_operations ubd_blops = { | ||
130 | .owner = THIS_MODULE, | ||
131 | .open = ubd_open, | ||
132 | .release = ubd_release, | ||
133 | .ioctl = ubd_ioctl, | ||
134 | }; | ||
135 | |||
136 | /* Protected by the queue_lock */ | ||
137 | static request_queue_t *ubd_queue; | ||
138 | |||
139 | /* Protected by ubd_lock */ | ||
140 | static int fake_major = MAJOR_NR; | ||
141 | |||
142 | static struct gendisk *ubd_gendisk[MAX_DEV]; | ||
143 | static struct gendisk *fake_gendisk[MAX_DEV]; | ||
144 | |||
145 | #ifdef CONFIG_BLK_DEV_UBD_SYNC | ||
146 | #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ | ||
147 | .cl = 1 }) | ||
148 | #else | ||
149 | #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ | ||
150 | .cl = 1 }) | ||
151 | #endif | ||
152 | |||
153 | /* Not protected - changed only in ubd_setup_common and then only to | ||
154 | * to enable O_SYNC. | ||
155 | */ | ||
156 | static struct openflags global_openflags = OPEN_FLAGS; | ||
157 | |||
158 | struct cow { | ||
159 | char *file; | ||
160 | int fd; | ||
161 | unsigned long *bitmap; | ||
162 | unsigned long bitmap_len; | ||
163 | int bitmap_offset; | ||
164 | int data_offset; | ||
165 | }; | ||
166 | |||
167 | struct ubd { | ||
168 | char *file; | ||
169 | int count; | ||
170 | int fd; | ||
171 | __u64 size; | ||
172 | struct openflags boot_openflags; | ||
173 | struct openflags openflags; | ||
174 | int no_cow; | ||
175 | struct cow cow; | ||
176 | struct platform_device pdev; | ||
177 | |||
178 | int map_writes; | ||
179 | int map_reads; | ||
180 | int nomap_writes; | ||
181 | int nomap_reads; | ||
182 | int write_maps; | ||
183 | }; | ||
184 | |||
185 | #define DEFAULT_COW { \ | ||
186 | .file = NULL, \ | ||
187 | .fd = -1, \ | ||
188 | .bitmap = NULL, \ | ||
189 | .bitmap_offset = 0, \ | ||
190 | .data_offset = 0, \ | ||
191 | } | ||
192 | |||
193 | #define DEFAULT_UBD { \ | ||
194 | .file = NULL, \ | ||
195 | .count = 0, \ | ||
196 | .fd = -1, \ | ||
197 | .size = -1, \ | ||
198 | .boot_openflags = OPEN_FLAGS, \ | ||
199 | .openflags = OPEN_FLAGS, \ | ||
200 | .no_cow = 0, \ | ||
201 | .cow = DEFAULT_COW, \ | ||
202 | .map_writes = 0, \ | ||
203 | .map_reads = 0, \ | ||
204 | .nomap_writes = 0, \ | ||
205 | .nomap_reads = 0, \ | ||
206 | .write_maps = 0, \ | ||
207 | } | ||
208 | |||
209 | struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; | ||
210 | |||
211 | static int ubd0_init(void) | ||
212 | { | ||
213 | struct ubd *dev = &ubd_dev[0]; | ||
214 | |||
215 | if(dev->file == NULL) | ||
216 | dev->file = "root_fs"; | ||
217 | return(0); | ||
218 | } | ||
219 | |||
220 | __initcall(ubd0_init); | ||
221 | |||
222 | /* Only changed by fake_ide_setup which is a setup */ | ||
223 | static int fake_ide = 0; | ||
224 | static struct proc_dir_entry *proc_ide_root = NULL; | ||
225 | static struct proc_dir_entry *proc_ide = NULL; | ||
226 | |||
227 | static void make_proc_ide(void) | ||
228 | { | ||
229 | proc_ide_root = proc_mkdir("ide", NULL); | ||
230 | proc_ide = proc_mkdir("ide0", proc_ide_root); | ||
231 | } | ||
232 | |||
233 | static int proc_ide_read_media(char *page, char **start, off_t off, int count, | ||
234 | int *eof, void *data) | ||
235 | { | ||
236 | int len; | ||
237 | |||
238 | strcpy(page, "disk\n"); | ||
239 | len = strlen("disk\n"); | ||
240 | len -= off; | ||
241 | if (len < count){ | ||
242 | *eof = 1; | ||
243 | if (len <= 0) return 0; | ||
244 | } | ||
245 | else len = count; | ||
246 | *start = page + off; | ||
247 | return len; | ||
248 | } | ||
249 | |||
250 | static void make_ide_entries(char *dev_name) | ||
251 | { | ||
252 | struct proc_dir_entry *dir, *ent; | ||
253 | char name[64]; | ||
254 | |||
255 | if(proc_ide_root == NULL) make_proc_ide(); | ||
256 | |||
257 | dir = proc_mkdir(dev_name, proc_ide); | ||
258 | if(!dir) return; | ||
259 | |||
260 | ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); | ||
261 | if(!ent) return; | ||
262 | ent->nlink = 1; | ||
263 | ent->data = NULL; | ||
264 | ent->read_proc = proc_ide_read_media; | ||
265 | ent->write_proc = NULL; | ||
266 | sprintf(name,"ide0/%s", dev_name); | ||
267 | proc_symlink(dev_name, proc_ide_root, name); | ||
268 | } | ||
269 | |||
270 | static int fake_ide_setup(char *str) | ||
271 | { | ||
272 | fake_ide = 1; | ||
273 | return(1); | ||
274 | } | ||
275 | |||
276 | __setup("fake_ide", fake_ide_setup); | ||
277 | |||
278 | __uml_help(fake_ide_setup, | ||
279 | "fake_ide\n" | ||
280 | " Create ide0 entries that map onto ubd devices.\n\n" | ||
281 | ); | ||
282 | |||
283 | static int parse_unit(char **ptr) | ||
284 | { | ||
285 | char *str = *ptr, *end; | ||
286 | int n = -1; | ||
287 | |||
288 | if(isdigit(*str)) { | ||
289 | n = simple_strtoul(str, &end, 0); | ||
290 | if(end == str) | ||
291 | return(-1); | ||
292 | *ptr = end; | ||
293 | } | ||
294 | else if (('a' <= *str) && (*str <= 'h')) { | ||
295 | n = *str - 'a'; | ||
296 | str++; | ||
297 | *ptr = str; | ||
298 | } | ||
299 | return(n); | ||
300 | } | ||
301 | |||
302 | static int ubd_setup_common(char *str, int *index_out) | ||
303 | { | ||
304 | struct ubd *dev; | ||
305 | struct openflags flags = global_openflags; | ||
306 | char *backing_file; | ||
307 | int n, err, i; | ||
308 | |||
309 | if(index_out) *index_out = -1; | ||
310 | n = *str; | ||
311 | if(n == '='){ | ||
312 | char *end; | ||
313 | int major; | ||
314 | |||
315 | str++; | ||
316 | if(!strcmp(str, "mmap")){ | ||
317 | CHOOSE_MODE(printk("mmap not supported by the ubd " | ||
318 | "driver in tt mode\n"), | ||
319 | ubd_do_mmap = 1); | ||
320 | return(0); | ||
321 | } | ||
322 | |||
323 | if(!strcmp(str, "sync")){ | ||
324 | global_openflags = of_sync(global_openflags); | ||
325 | return(0); | ||
326 | } | ||
327 | major = simple_strtoul(str, &end, 0); | ||
328 | if((*end != '\0') || (end == str)){ | ||
329 | printk(KERN_ERR | ||
330 | "ubd_setup : didn't parse major number\n"); | ||
331 | return(1); | ||
332 | } | ||
333 | |||
334 | err = 1; | ||
335 | spin_lock(&ubd_lock); | ||
336 | if(fake_major != MAJOR_NR){ | ||
337 | printk(KERN_ERR "Can't assign a fake major twice\n"); | ||
338 | goto out1; | ||
339 | } | ||
340 | |||
341 | fake_major = major; | ||
342 | |||
343 | printk(KERN_INFO "Setting extra ubd major number to %d\n", | ||
344 | major); | ||
345 | err = 0; | ||
346 | out1: | ||
347 | spin_unlock(&ubd_lock); | ||
348 | return(err); | ||
349 | } | ||
350 | |||
351 | n = parse_unit(&str); | ||
352 | if(n < 0){ | ||
353 | printk(KERN_ERR "ubd_setup : couldn't parse unit number " | ||
354 | "'%s'\n", str); | ||
355 | return(1); | ||
356 | } | ||
357 | if(n >= MAX_DEV){ | ||
358 | printk(KERN_ERR "ubd_setup : index %d out of range " | ||
359 | "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1); | ||
360 | return(1); | ||
361 | } | ||
362 | |||
363 | err = 1; | ||
364 | spin_lock(&ubd_lock); | ||
365 | |||
366 | dev = &ubd_dev[n]; | ||
367 | if(dev->file != NULL){ | ||
368 | printk(KERN_ERR "ubd_setup : device already configured\n"); | ||
369 | goto out; | ||
370 | } | ||
371 | |||
372 | if (index_out) | ||
373 | *index_out = n; | ||
374 | |||
375 | for (i = 0; i < 4; i++) { | ||
376 | switch (*str) { | ||
377 | case 'r': | ||
378 | flags.w = 0; | ||
379 | break; | ||
380 | case 's': | ||
381 | flags.s = 1; | ||
382 | break; | ||
383 | case 'd': | ||
384 | dev->no_cow = 1; | ||
385 | break; | ||
386 | case '=': | ||
387 | str++; | ||
388 | goto break_loop; | ||
389 | default: | ||
390 | printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n"); | ||
391 | goto out; | ||
392 | } | ||
393 | str++; | ||
394 | } | ||
395 | |||
396 | if (*str == '=') | ||
397 | printk(KERN_ERR "ubd_setup : Too many flags specified\n"); | ||
398 | else | ||
399 | printk(KERN_ERR "ubd_setup : Expected '='\n"); | ||
400 | goto out; | ||
401 | |||
402 | break_loop: | ||
403 | err = 0; | ||
404 | backing_file = strchr(str, ','); | ||
405 | |||
406 | if (!backing_file) { | ||
407 | backing_file = strchr(str, ':'); | ||
408 | } | ||
409 | |||
410 | if(backing_file){ | ||
411 | if(dev->no_cow) | ||
412 | printk(KERN_ERR "Can't specify both 'd' and a " | ||
413 | "cow file\n"); | ||
414 | else { | ||
415 | *backing_file = '\0'; | ||
416 | backing_file++; | ||
417 | } | ||
418 | } | ||
419 | dev->file = str; | ||
420 | dev->cow.file = backing_file; | ||
421 | dev->boot_openflags = flags; | ||
422 | out: | ||
423 | spin_unlock(&ubd_lock); | ||
424 | return(err); | ||
425 | } | ||
426 | |||
427 | static int ubd_setup(char *str) | ||
428 | { | ||
429 | ubd_setup_common(str, NULL); | ||
430 | return(1); | ||
431 | } | ||
432 | |||
433 | __setup("ubd", ubd_setup); | ||
434 | __uml_help(ubd_setup, | ||
435 | "ubd<n><flags>=<filename>[(:|,)<filename2>]\n" | ||
436 | " This is used to associate a device with a file in the underlying\n" | ||
437 | " filesystem. When specifying two filenames, the first one is the\n" | ||
438 | " COW name and the second is the backing file name. As separator you can\n" | ||
439 | " use either a ':' or a ',': the first one allows writing things like;\n" | ||
440 | " ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" | ||
441 | " while with a ',' the shell would not expand the 2nd '~'.\n" | ||
442 | " When using only one filename, UML will detect whether to thread it like\n" | ||
443 | " a COW file or a backing file. To override this detection, add the 'd'\n" | ||
444 | " flag:\n" | ||
445 | " ubd0d=BackingFile\n" | ||
446 | " Usually, there is a filesystem in the file, but \n" | ||
447 | " that's not required. Swap devices containing swap files can be\n" | ||
448 | " specified like this. Also, a file which doesn't contain a\n" | ||
449 | " filesystem can have its contents read in the virtual \n" | ||
450 | " machine by running 'dd' on the device. <n> must be in the range\n" | ||
451 | " 0 to 7. Appending an 'r' to the number will cause that device\n" | ||
452 | " to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" | ||
453 | " an 's' will cause data to be written to disk on the host immediately.\n\n" | ||
454 | ); | ||
455 | |||
456 | static int udb_setup(char *str) | ||
457 | { | ||
458 | printk("udb%s specified on command line is almost certainly a ubd -> " | ||
459 | "udb TYPO\n", str); | ||
460 | return(1); | ||
461 | } | ||
462 | |||
463 | __setup("udb", udb_setup); | ||
464 | __uml_help(udb_setup, | ||
465 | "udb\n" | ||
466 | " This option is here solely to catch ubd -> udb typos, which can be\n\n" | ||
467 | " to impossible to catch visually unless you specifically look for\n\n" | ||
468 | " them. The only result of any option starting with 'udb' is an error\n\n" | ||
469 | " in the boot output.\n\n" | ||
470 | ); | ||
471 | |||
472 | static int fakehd_set = 0; | ||
473 | static int fakehd(char *str) | ||
474 | { | ||
475 | printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); | ||
476 | fakehd_set = 1; | ||
477 | return 1; | ||
478 | } | ||
479 | |||
480 | __setup("fakehd", fakehd); | ||
481 | __uml_help(fakehd, | ||
482 | "fakehd\n" | ||
483 | " Change the ubd device name to \"hd\".\n\n" | ||
484 | ); | ||
485 | |||
486 | static void do_ubd_request(request_queue_t * q); | ||
487 | |||
488 | /* Only changed by ubd_init, which is an initcall. */ | ||
489 | int thread_fd = -1; | ||
490 | |||
491 | /* Changed by ubd_handler, which is serialized because interrupts only | ||
492 | * happen on CPU 0. | ||
493 | */ | ||
494 | int intr_count = 0; | ||
495 | |||
496 | /* call ubd_finish if you need to serialize */ | ||
497 | static void __ubd_finish(struct request *req, int error) | ||
498 | { | ||
499 | int nsect; | ||
500 | |||
501 | if(error){ | ||
502 | end_request(req, 0); | ||
503 | return; | ||
504 | } | ||
505 | nsect = req->current_nr_sectors; | ||
506 | req->sector += nsect; | ||
507 | req->buffer += nsect << 9; | ||
508 | req->errors = 0; | ||
509 | req->nr_sectors -= nsect; | ||
510 | req->current_nr_sectors = 0; | ||
511 | end_request(req, 1); | ||
512 | } | ||
513 | |||
514 | static inline void ubd_finish(struct request *req, int error) | ||
515 | { | ||
516 | spin_lock(&ubd_io_lock); | ||
517 | __ubd_finish(req, error); | ||
518 | spin_unlock(&ubd_io_lock); | ||
519 | } | ||
520 | |||
521 | /* Called without ubd_io_lock held */ | ||
522 | static void ubd_handler(void) | ||
523 | { | ||
524 | struct io_thread_req req; | ||
525 | struct request *rq = elv_next_request(ubd_queue); | ||
526 | int n, err; | ||
527 | |||
528 | do_ubd = NULL; | ||
529 | intr_count++; | ||
530 | n = os_read_file(thread_fd, &req, sizeof(req)); | ||
531 | if(n != sizeof(req)){ | ||
532 | printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " | ||
533 | "err = %d\n", os_getpid(), -n); | ||
534 | spin_lock(&ubd_io_lock); | ||
535 | end_request(rq, 0); | ||
536 | spin_unlock(&ubd_io_lock); | ||
537 | return; | ||
538 | } | ||
539 | |||
540 | if((req.op != UBD_MMAP) && | ||
541 | ((req.offset != ((__u64) (rq->sector)) << 9) || | ||
542 | (req.length != (rq->current_nr_sectors) << 9))) | ||
543 | panic("I/O op mismatch"); | ||
544 | |||
545 | if(req.map_fd != -1){ | ||
546 | err = physmem_subst_mapping(req.buffer, req.map_fd, | ||
547 | req.map_offset, 1); | ||
548 | if(err) | ||
549 | printk("ubd_handler - physmem_subst_mapping failed, " | ||
550 | "err = %d\n", -err); | ||
551 | } | ||
552 | |||
553 | ubd_finish(rq, req.error); | ||
554 | reactivate_fd(thread_fd, UBD_IRQ); | ||
555 | do_ubd_request(ubd_queue); | ||
556 | } | ||
557 | |||
558 | static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) | ||
559 | { | ||
560 | ubd_handler(); | ||
561 | return(IRQ_HANDLED); | ||
562 | } | ||
563 | |||
564 | /* Only changed by ubd_init, which is an initcall. */ | ||
565 | static int io_pid = -1; | ||
566 | |||
567 | void kill_io_thread(void) | ||
568 | { | ||
569 | if(io_pid != -1) | ||
570 | os_kill_process(io_pid, 1); | ||
571 | } | ||
572 | |||
573 | __uml_exitcall(kill_io_thread); | ||
574 | |||
575 | static int ubd_file_size(struct ubd *dev, __u64 *size_out) | ||
576 | { | ||
577 | char *file; | ||
578 | |||
579 | file = dev->cow.file ? dev->cow.file : dev->file; | ||
580 | return(os_file_size(file, size_out)); | ||
581 | } | ||
582 | |||
583 | static void ubd_close(struct ubd *dev) | ||
584 | { | ||
585 | if(ubd_do_mmap) | ||
586 | physmem_forget_descriptor(dev->fd); | ||
587 | os_close_file(dev->fd); | ||
588 | if(dev->cow.file == NULL) | ||
589 | return; | ||
590 | |||
591 | if(ubd_do_mmap) | ||
592 | physmem_forget_descriptor(dev->cow.fd); | ||
593 | os_close_file(dev->cow.fd); | ||
594 | vfree(dev->cow.bitmap); | ||
595 | dev->cow.bitmap = NULL; | ||
596 | } | ||
597 | |||
598 | static int ubd_open_dev(struct ubd *dev) | ||
599 | { | ||
600 | struct openflags flags; | ||
601 | char **back_ptr; | ||
602 | int err, create_cow, *create_ptr; | ||
603 | |||
604 | dev->openflags = dev->boot_openflags; | ||
605 | create_cow = 0; | ||
606 | create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; | ||
607 | back_ptr = dev->no_cow ? NULL : &dev->cow.file; | ||
608 | dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, | ||
609 | &dev->cow.bitmap_offset, &dev->cow.bitmap_len, | ||
610 | &dev->cow.data_offset, create_ptr); | ||
611 | |||
612 | if((dev->fd == -ENOENT) && create_cow){ | ||
613 | dev->fd = create_cow_file(dev->file, dev->cow.file, | ||
614 | dev->openflags, 1 << 9, PAGE_SIZE, | ||
615 | &dev->cow.bitmap_offset, | ||
616 | &dev->cow.bitmap_len, | ||
617 | &dev->cow.data_offset); | ||
618 | if(dev->fd >= 0){ | ||
619 | printk(KERN_INFO "Creating \"%s\" as COW file for " | ||
620 | "\"%s\"\n", dev->file, dev->cow.file); | ||
621 | } | ||
622 | } | ||
623 | |||
624 | if(dev->fd < 0){ | ||
625 | printk("Failed to open '%s', errno = %d\n", dev->file, | ||
626 | -dev->fd); | ||
627 | return(dev->fd); | ||
628 | } | ||
629 | |||
630 | if(dev->cow.file != NULL){ | ||
631 | err = -ENOMEM; | ||
632 | dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); | ||
633 | if(dev->cow.bitmap == NULL){ | ||
634 | printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); | ||
635 | goto error; | ||
636 | } | ||
637 | flush_tlb_kernel_vm(); | ||
638 | |||
639 | err = read_cow_bitmap(dev->fd, dev->cow.bitmap, | ||
640 | dev->cow.bitmap_offset, | ||
641 | dev->cow.bitmap_len); | ||
642 | if(err < 0) | ||
643 | goto error; | ||
644 | |||
645 | flags = dev->openflags; | ||
646 | flags.w = 0; | ||
647 | err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, | ||
648 | NULL, NULL); | ||
649 | if(err < 0) goto error; | ||
650 | dev->cow.fd = err; | ||
651 | } | ||
652 | return(0); | ||
653 | error: | ||
654 | os_close_file(dev->fd); | ||
655 | return(err); | ||
656 | } | ||
657 | |||
658 | static int ubd_new_disk(int major, u64 size, int unit, | ||
659 | struct gendisk **disk_out) | ||
660 | |||
661 | { | ||
662 | struct gendisk *disk; | ||
663 | char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")]; | ||
664 | int err; | ||
665 | |||
666 | disk = alloc_disk(1 << UBD_SHIFT); | ||
667 | if(disk == NULL) | ||
668 | return(-ENOMEM); | ||
669 | |||
670 | disk->major = major; | ||
671 | disk->first_minor = unit << UBD_SHIFT; | ||
672 | disk->fops = &ubd_blops; | ||
673 | set_capacity(disk, size / 512); | ||
674 | if(major == MAJOR_NR){ | ||
675 | sprintf(disk->disk_name, "ubd%c", 'a' + unit); | ||
676 | sprintf(disk->devfs_name, "ubd/disc%d", unit); | ||
677 | sprintf(from, "ubd/%d", unit); | ||
678 | sprintf(to, "disc%d/disc", unit); | ||
679 | err = devfs_mk_symlink(from, to); | ||
680 | if(err) | ||
681 | printk("ubd_new_disk failed to make link from %s to " | ||
682 | "%s, error = %d\n", from, to, err); | ||
683 | } | ||
684 | else { | ||
685 | sprintf(disk->disk_name, "ubd_fake%d", unit); | ||
686 | sprintf(disk->devfs_name, "ubd_fake/disc%d", unit); | ||
687 | } | ||
688 | |||
689 | /* sysfs register (not for ide fake devices) */ | ||
690 | if (major == MAJOR_NR) { | ||
691 | ubd_dev[unit].pdev.id = unit; | ||
692 | ubd_dev[unit].pdev.name = DRIVER_NAME; | ||
693 | platform_device_register(&ubd_dev[unit].pdev); | ||
694 | disk->driverfs_dev = &ubd_dev[unit].pdev.dev; | ||
695 | } | ||
696 | |||
697 | disk->private_data = &ubd_dev[unit]; | ||
698 | disk->queue = ubd_queue; | ||
699 | add_disk(disk); | ||
700 | |||
701 | *disk_out = disk; | ||
702 | return 0; | ||
703 | } | ||
704 | |||
705 | #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) | ||
706 | |||
707 | static int ubd_add(int n) | ||
708 | { | ||
709 | struct ubd *dev = &ubd_dev[n]; | ||
710 | int err; | ||
711 | |||
712 | if(dev->file == NULL) | ||
713 | return(-ENODEV); | ||
714 | |||
715 | if (ubd_open_dev(dev)) | ||
716 | return(-ENODEV); | ||
717 | |||
718 | err = ubd_file_size(dev, &dev->size); | ||
719 | if(err < 0) | ||
720 | return(err); | ||
721 | |||
722 | dev->size = ROUND_BLOCK(dev->size); | ||
723 | |||
724 | err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]); | ||
725 | if(err) | ||
726 | return(err); | ||
727 | |||
728 | if(fake_major != MAJOR_NR) | ||
729 | ubd_new_disk(fake_major, dev->size, n, | ||
730 | &fake_gendisk[n]); | ||
731 | |||
732 | /* perhaps this should also be under the "if (fake_major)" above */ | ||
733 | /* using the fake_disk->disk_name and also the fakehd_set name */ | ||
734 | if (fake_ide) | ||
735 | make_ide_entries(ubd_gendisk[n]->disk_name); | ||
736 | |||
737 | ubd_close(dev); | ||
738 | return 0; | ||
739 | } | ||
740 | |||
741 | static int ubd_config(char *str) | ||
742 | { | ||
743 | int n, err; | ||
744 | |||
745 | str = uml_strdup(str); | ||
746 | if(str == NULL){ | ||
747 | printk(KERN_ERR "ubd_config failed to strdup string\n"); | ||
748 | return(1); | ||
749 | } | ||
750 | err = ubd_setup_common(str, &n); | ||
751 | if(err){ | ||
752 | kfree(str); | ||
753 | return(-1); | ||
754 | } | ||
755 | if(n == -1) return(0); | ||
756 | |||
757 | spin_lock(&ubd_lock); | ||
758 | err = ubd_add(n); | ||
759 | if(err) | ||
760 | ubd_dev[n].file = NULL; | ||
761 | spin_unlock(&ubd_lock); | ||
762 | |||
763 | return(err); | ||
764 | } | ||
765 | |||
766 | static int ubd_get_config(char *name, char *str, int size, char **error_out) | ||
767 | { | ||
768 | struct ubd *dev; | ||
769 | int n, len = 0; | ||
770 | |||
771 | n = parse_unit(&name); | ||
772 | if((n >= MAX_DEV) || (n < 0)){ | ||
773 | *error_out = "ubd_get_config : device number out of range"; | ||
774 | return(-1); | ||
775 | } | ||
776 | |||
777 | dev = &ubd_dev[n]; | ||
778 | spin_lock(&ubd_lock); | ||
779 | |||
780 | if(dev->file == NULL){ | ||
781 | CONFIG_CHUNK(str, size, len, "", 1); | ||
782 | goto out; | ||
783 | } | ||
784 | |||
785 | CONFIG_CHUNK(str, size, len, dev->file, 0); | ||
786 | |||
787 | if(dev->cow.file != NULL){ | ||
788 | CONFIG_CHUNK(str, size, len, ",", 0); | ||
789 | CONFIG_CHUNK(str, size, len, dev->cow.file, 1); | ||
790 | } | ||
791 | else CONFIG_CHUNK(str, size, len, "", 1); | ||
792 | |||
793 | out: | ||
794 | spin_unlock(&ubd_lock); | ||
795 | return(len); | ||
796 | } | ||
797 | |||
798 | static int ubd_remove(char *str) | ||
799 | { | ||
800 | struct ubd *dev; | ||
801 | int n, err = -ENODEV; | ||
802 | |||
803 | n = parse_unit(&str); | ||
804 | |||
805 | if((n < 0) || (n >= MAX_DEV)) | ||
806 | return(err); | ||
807 | |||
808 | dev = &ubd_dev[n]; | ||
809 | if(dev->count > 0) | ||
810 | return(-EBUSY); /* you cannot remove a open disk */ | ||
811 | |||
812 | err = 0; | ||
813 | spin_lock(&ubd_lock); | ||
814 | |||
815 | if(ubd_gendisk[n] == NULL) | ||
816 | goto out; | ||
817 | |||
818 | del_gendisk(ubd_gendisk[n]); | ||
819 | put_disk(ubd_gendisk[n]); | ||
820 | ubd_gendisk[n] = NULL; | ||
821 | |||
822 | if(fake_gendisk[n] != NULL){ | ||
823 | del_gendisk(fake_gendisk[n]); | ||
824 | put_disk(fake_gendisk[n]); | ||
825 | fake_gendisk[n] = NULL; | ||
826 | } | ||
827 | |||
828 | platform_device_unregister(&dev->pdev); | ||
829 | *dev = ((struct ubd) DEFAULT_UBD); | ||
830 | err = 0; | ||
831 | out: | ||
832 | spin_unlock(&ubd_lock); | ||
833 | return(err); | ||
834 | } | ||
835 | |||
836 | static struct mc_device ubd_mc = { | ||
837 | .name = "ubd", | ||
838 | .config = ubd_config, | ||
839 | .get_config = ubd_get_config, | ||
840 | .remove = ubd_remove, | ||
841 | }; | ||
842 | |||
843 | static int ubd_mc_init(void) | ||
844 | { | ||
845 | mconsole_register_dev(&ubd_mc); | ||
846 | return 0; | ||
847 | } | ||
848 | |||
849 | __initcall(ubd_mc_init); | ||
850 | |||
851 | static struct device_driver ubd_driver = { | ||
852 | .name = DRIVER_NAME, | ||
853 | .bus = &platform_bus_type, | ||
854 | }; | ||
855 | |||
856 | int ubd_init(void) | ||
857 | { | ||
858 | int i; | ||
859 | |||
860 | devfs_mk_dir("ubd"); | ||
861 | if (register_blkdev(MAJOR_NR, "ubd")) | ||
862 | return -1; | ||
863 | |||
864 | ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock); | ||
865 | if (!ubd_queue) { | ||
866 | unregister_blkdev(MAJOR_NR, "ubd"); | ||
867 | return -1; | ||
868 | } | ||
869 | |||
870 | if (fake_major != MAJOR_NR) { | ||
871 | char name[sizeof("ubd_nnn\0")]; | ||
872 | |||
873 | snprintf(name, sizeof(name), "ubd_%d", fake_major); | ||
874 | devfs_mk_dir(name); | ||
875 | if (register_blkdev(fake_major, "ubd")) | ||
876 | return -1; | ||
877 | } | ||
878 | driver_register(&ubd_driver); | ||
879 | for (i = 0; i < MAX_DEV; i++) | ||
880 | ubd_add(i); | ||
881 | return 0; | ||
882 | } | ||
883 | |||
884 | late_initcall(ubd_init); | ||
885 | |||
886 | int ubd_driver_init(void){ | ||
887 | unsigned long stack; | ||
888 | int err; | ||
889 | |||
890 | /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ | ||
891 | if(global_openflags.s){ | ||
892 | printk(KERN_INFO "ubd: Synchronous mode\n"); | ||
893 | /* Letting ubd=sync be like using ubd#s= instead of ubd#= is | ||
894 | * enough. So use anyway the io thread. */ | ||
895 | } | ||
896 | stack = alloc_stack(0, 0); | ||
897 | io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), | ||
898 | &thread_fd); | ||
899 | if(io_pid < 0){ | ||
900 | printk(KERN_ERR | ||
901 | "ubd : Failed to start I/O thread (errno = %d) - " | ||
902 | "falling back to synchronous I/O\n", -io_pid); | ||
903 | io_pid = -1; | ||
904 | return(0); | ||
905 | } | ||
906 | err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, | ||
907 | SA_INTERRUPT, "ubd", ubd_dev); | ||
908 | if(err != 0) | ||
909 | printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); | ||
910 | return(err); | ||
911 | } | ||
912 | |||
913 | device_initcall(ubd_driver_init); | ||
914 | |||
915 | static int ubd_open(struct inode *inode, struct file *filp) | ||
916 | { | ||
917 | struct gendisk *disk = inode->i_bdev->bd_disk; | ||
918 | struct ubd *dev = disk->private_data; | ||
919 | int err = 0; | ||
920 | |||
921 | if(dev->count == 0){ | ||
922 | err = ubd_open_dev(dev); | ||
923 | if(err){ | ||
924 | printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", | ||
925 | disk->disk_name, dev->file, -err); | ||
926 | goto out; | ||
927 | } | ||
928 | } | ||
929 | dev->count++; | ||
930 | if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){ | ||
931 | if(--dev->count == 0) ubd_close(dev); | ||
932 | err = -EROFS; | ||
933 | } | ||
934 | out: | ||
935 | return(err); | ||
936 | } | ||
937 | |||
938 | static int ubd_release(struct inode * inode, struct file * file) | ||
939 | { | ||
940 | struct gendisk *disk = inode->i_bdev->bd_disk; | ||
941 | struct ubd *dev = disk->private_data; | ||
942 | |||
943 | if(--dev->count == 0) | ||
944 | ubd_close(dev); | ||
945 | return(0); | ||
946 | } | ||
947 | |||
948 | static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, | ||
949 | __u64 *cow_offset, unsigned long *bitmap, | ||
950 | __u64 bitmap_offset, unsigned long *bitmap_words, | ||
951 | __u64 bitmap_len) | ||
952 | { | ||
953 | __u64 sector = io_offset >> 9; | ||
954 | int i, update_bitmap = 0; | ||
955 | |||
956 | for(i = 0; i < length >> 9; i++){ | ||
957 | if(cow_mask != NULL) | ||
958 | ubd_set_bit(i, (unsigned char *) cow_mask); | ||
959 | if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) | ||
960 | continue; | ||
961 | |||
962 | update_bitmap = 1; | ||
963 | ubd_set_bit(sector + i, (unsigned char *) bitmap); | ||
964 | } | ||
965 | |||
966 | if(!update_bitmap) | ||
967 | return; | ||
968 | |||
969 | *cow_offset = sector / (sizeof(unsigned long) * 8); | ||
970 | |||
971 | /* This takes care of the case where we're exactly at the end of the | ||
972 | * device, and *cow_offset + 1 is off the end. So, just back it up | ||
973 | * by one word. Thanks to Lynn Kerby for the fix and James McMechan | ||
974 | * for the original diagnosis. | ||
975 | */ | ||
976 | if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / | ||
977 | sizeof(unsigned long) - 1)) | ||
978 | (*cow_offset)--; | ||
979 | |||
980 | bitmap_words[0] = bitmap[*cow_offset]; | ||
981 | bitmap_words[1] = bitmap[*cow_offset + 1]; | ||
982 | |||
983 | *cow_offset *= sizeof(unsigned long); | ||
984 | *cow_offset += bitmap_offset; | ||
985 | } | ||
986 | |||
987 | static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, | ||
988 | __u64 bitmap_offset, __u64 bitmap_len) | ||
989 | { | ||
990 | __u64 sector = req->offset >> 9; | ||
991 | int i; | ||
992 | |||
993 | if(req->length > (sizeof(req->sector_mask) * 8) << 9) | ||
994 | panic("Operation too long"); | ||
995 | |||
996 | if(req->op == UBD_READ) { | ||
997 | for(i = 0; i < req->length >> 9; i++){ | ||
998 | if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) | ||
999 | ubd_set_bit(i, (unsigned char *) | ||
1000 | &req->sector_mask); | ||
1001 | } | ||
1002 | } | ||
1003 | else cowify_bitmap(req->offset, req->length, &req->sector_mask, | ||
1004 | &req->cow_offset, bitmap, bitmap_offset, | ||
1005 | req->bitmap_words, bitmap_len); | ||
1006 | } | ||
1007 | |||
1008 | static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) | ||
1009 | { | ||
1010 | __u64 sector; | ||
1011 | unsigned char *bitmap; | ||
1012 | int bit, i; | ||
1013 | |||
1014 | /* mmap must have been requested on the command line */ | ||
1015 | if(!ubd_do_mmap) | ||
1016 | return(-1); | ||
1017 | |||
1018 | /* The buffer must be page aligned */ | ||
1019 | if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) | ||
1020 | return(-1); | ||
1021 | |||
1022 | /* The request must be a page long */ | ||
1023 | if((req->current_nr_sectors << 9) != PAGE_SIZE) | ||
1024 | return(-1); | ||
1025 | |||
1026 | if(dev->cow.file == NULL) | ||
1027 | return(dev->fd); | ||
1028 | |||
1029 | sector = offset >> 9; | ||
1030 | bitmap = (unsigned char *) dev->cow.bitmap; | ||
1031 | bit = ubd_test_bit(sector, bitmap); | ||
1032 | |||
1033 | for(i = 1; i < req->current_nr_sectors; i++){ | ||
1034 | if(ubd_test_bit(sector + i, bitmap) != bit) | ||
1035 | return(-1); | ||
1036 | } | ||
1037 | |||
1038 | if(bit || (rq_data_dir(req) == WRITE)) | ||
1039 | offset += dev->cow.data_offset; | ||
1040 | |||
1041 | /* The data on disk must be page aligned */ | ||
1042 | if((offset % UBD_MMAP_BLOCK_SIZE) != 0) | ||
1043 | return(-1); | ||
1044 | |||
1045 | return(bit ? dev->fd : dev->cow.fd); | ||
1046 | } | ||
1047 | |||
1048 | static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, | ||
1049 | struct request *req, | ||
1050 | struct io_thread_req *io_req) | ||
1051 | { | ||
1052 | int err; | ||
1053 | |||
1054 | if(rq_data_dir(req) == WRITE){ | ||
1055 | /* Writes are almost no-ops since the new data is already in the | ||
1056 | * host page cache | ||
1057 | */ | ||
1058 | dev->map_writes++; | ||
1059 | if(dev->cow.file != NULL) | ||
1060 | cowify_bitmap(io_req->offset, io_req->length, | ||
1061 | &io_req->sector_mask, &io_req->cow_offset, | ||
1062 | dev->cow.bitmap, dev->cow.bitmap_offset, | ||
1063 | io_req->bitmap_words, | ||
1064 | dev->cow.bitmap_len); | ||
1065 | } | ||
1066 | else { | ||
1067 | int w; | ||
1068 | |||
1069 | if((dev->cow.file != NULL) && (fd == dev->cow.fd)) | ||
1070 | w = 0; | ||
1071 | else w = dev->openflags.w; | ||
1072 | |||
1073 | if((dev->cow.file != NULL) && (fd == dev->fd)) | ||
1074 | offset += dev->cow.data_offset; | ||
1075 | |||
1076 | err = physmem_subst_mapping(req->buffer, fd, offset, w); | ||
1077 | if(err){ | ||
1078 | printk("physmem_subst_mapping failed, err = %d\n", | ||
1079 | -err); | ||
1080 | return(1); | ||
1081 | } | ||
1082 | dev->map_reads++; | ||
1083 | } | ||
1084 | io_req->op = UBD_MMAP; | ||
1085 | io_req->buffer = req->buffer; | ||
1086 | return(0); | ||
1087 | } | ||
1088 | |||
1089 | /* Called with ubd_io_lock held */ | ||
1090 | static int prepare_request(struct request *req, struct io_thread_req *io_req) | ||
1091 | { | ||
1092 | struct gendisk *disk = req->rq_disk; | ||
1093 | struct ubd *dev = disk->private_data; | ||
1094 | __u64 offset; | ||
1095 | int len, fd; | ||
1096 | |||
1097 | if(req->rq_status == RQ_INACTIVE) return(1); | ||
1098 | |||
1099 | if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ | ||
1100 | printk("Write attempted on readonly ubd device %s\n", | ||
1101 | disk->disk_name); | ||
1102 | end_request(req, 0); | ||
1103 | return(1); | ||
1104 | } | ||
1105 | |||
1106 | offset = ((__u64) req->sector) << 9; | ||
1107 | len = req->current_nr_sectors << 9; | ||
1108 | |||
1109 | io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; | ||
1110 | io_req->fds[1] = dev->fd; | ||
1111 | io_req->map_fd = -1; | ||
1112 | io_req->cow_offset = -1; | ||
1113 | io_req->offset = offset; | ||
1114 | io_req->length = len; | ||
1115 | io_req->error = 0; | ||
1116 | io_req->sector_mask = 0; | ||
1117 | |||
1118 | fd = mmap_fd(req, dev, io_req->offset); | ||
1119 | if(fd > 0){ | ||
1120 | /* If mmapping is otherwise OK, but the first access to the | ||
1121 | * page is a write, then it's not mapped in yet. So we have | ||
1122 | * to write the data to disk first, then we can map the disk | ||
1123 | * page in and continue normally from there. | ||
1124 | */ | ||
1125 | if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){ | ||
1126 | io_req->map_fd = dev->fd; | ||
1127 | io_req->map_offset = io_req->offset + | ||
1128 | dev->cow.data_offset; | ||
1129 | dev->write_maps++; | ||
1130 | } | ||
1131 | else return(prepare_mmap_request(dev, fd, io_req->offset, req, | ||
1132 | io_req)); | ||
1133 | } | ||
1134 | |||
1135 | if(rq_data_dir(req) == READ) | ||
1136 | dev->nomap_reads++; | ||
1137 | else dev->nomap_writes++; | ||
1138 | |||
1139 | io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; | ||
1140 | io_req->offsets[0] = 0; | ||
1141 | io_req->offsets[1] = dev->cow.data_offset; | ||
1142 | io_req->buffer = req->buffer; | ||
1143 | io_req->sectorsize = 1 << 9; | ||
1144 | |||
1145 | if(dev->cow.file != NULL) | ||
1146 | cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, | ||
1147 | dev->cow.bitmap_len); | ||
1148 | |||
1149 | return(0); | ||
1150 | } | ||
1151 | |||
1152 | /* Called with ubd_io_lock held */ | ||
1153 | static void do_ubd_request(request_queue_t *q) | ||
1154 | { | ||
1155 | struct io_thread_req io_req; | ||
1156 | struct request *req; | ||
1157 | int err, n; | ||
1158 | |||
1159 | if(thread_fd == -1){ | ||
1160 | while((req = elv_next_request(q)) != NULL){ | ||
1161 | err = prepare_request(req, &io_req); | ||
1162 | if(!err){ | ||
1163 | do_io(&io_req); | ||
1164 | __ubd_finish(req, io_req.error); | ||
1165 | } | ||
1166 | } | ||
1167 | } | ||
1168 | else { | ||
1169 | if(do_ubd || (req = elv_next_request(q)) == NULL) | ||
1170 | return; | ||
1171 | err = prepare_request(req, &io_req); | ||
1172 | if(!err){ | ||
1173 | do_ubd = ubd_handler; | ||
1174 | n = os_write_file(thread_fd, (char *) &io_req, | ||
1175 | sizeof(io_req)); | ||
1176 | if(n != sizeof(io_req)) | ||
1177 | printk("write to io thread failed, " | ||
1178 | "errno = %d\n", -n); | ||
1179 | } | ||
1180 | } | ||
1181 | } | ||
1182 | |||
1183 | static int ubd_ioctl(struct inode * inode, struct file * file, | ||
1184 | unsigned int cmd, unsigned long arg) | ||
1185 | { | ||
1186 | struct hd_geometry __user *loc = (struct hd_geometry __user *) arg; | ||
1187 | struct ubd *dev = inode->i_bdev->bd_disk->private_data; | ||
1188 | struct hd_driveid ubd_id = { | ||
1189 | .cyls = 0, | ||
1190 | .heads = 128, | ||
1191 | .sectors = 32, | ||
1192 | }; | ||
1193 | |||
1194 | switch (cmd) { | ||
1195 | struct hd_geometry g; | ||
1196 | struct cdrom_volctrl volume; | ||
1197 | case HDIO_GETGEO: | ||
1198 | if(!loc) return(-EINVAL); | ||
1199 | g.heads = 128; | ||
1200 | g.sectors = 32; | ||
1201 | g.cylinders = dev->size / (128 * 32 * 512); | ||
1202 | g.start = get_start_sect(inode->i_bdev); | ||
1203 | return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); | ||
1204 | |||
1205 | case HDIO_GET_IDENTITY: | ||
1206 | ubd_id.cyls = dev->size / (128 * 32 * 512); | ||
1207 | if(copy_to_user((char __user *) arg, (char *) &ubd_id, | ||
1208 | sizeof(ubd_id))) | ||
1209 | return(-EFAULT); | ||
1210 | return(0); | ||
1211 | |||
1212 | case CDROMVOLREAD: | ||
1213 | if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) | ||
1214 | return(-EFAULT); | ||
1215 | volume.channel0 = 255; | ||
1216 | volume.channel1 = 255; | ||
1217 | volume.channel2 = 255; | ||
1218 | volume.channel3 = 255; | ||
1219 | if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) | ||
1220 | return(-EFAULT); | ||
1221 | return(0); | ||
1222 | } | ||
1223 | return(-EINVAL); | ||
1224 | } | ||
1225 | |||
1226 | static int ubd_check_remapped(int fd, unsigned long address, int is_write, | ||
1227 | __u64 offset) | ||
1228 | { | ||
1229 | __u64 bitmap_offset; | ||
1230 | unsigned long new_bitmap[2]; | ||
1231 | int i, err, n; | ||
1232 | |||
1233 | /* If it's not a write access, we can't do anything about it */ | ||
1234 | if(!is_write) | ||
1235 | return(0); | ||
1236 | |||
1237 | /* We have a write */ | ||
1238 | for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ | ||
1239 | struct ubd *dev = &ubd_dev[i]; | ||
1240 | |||
1241 | if((dev->fd != fd) && (dev->cow.fd != fd)) | ||
1242 | continue; | ||
1243 | |||
1244 | /* It's a write to a ubd device */ | ||
1245 | |||
1246 | if(!dev->openflags.w){ | ||
1247 | /* It's a write access on a read-only device - probably | ||
1248 | * shouldn't happen. If the kernel is trying to change | ||
1249 | * something with no intention of writing it back out, | ||
1250 | * then this message will clue us in that this needs | ||
1251 | * fixing | ||
1252 | */ | ||
1253 | printk("Write access to mapped page from readonly ubd " | ||
1254 | "device %d\n", i); | ||
1255 | return(0); | ||
1256 | } | ||
1257 | |||
1258 | /* It's a write to a writeable ubd device - it must be COWed | ||
1259 | * because, otherwise, the page would have been mapped in | ||
1260 | * writeable | ||
1261 | */ | ||
1262 | |||
1263 | if(!dev->cow.file) | ||
1264 | panic("Write fault on writeable non-COW ubd device %d", | ||
1265 | i); | ||
1266 | |||
1267 | /* It should also be an access to the backing file since the | ||
1268 | * COW pages should be mapped in read-write | ||
1269 | */ | ||
1270 | |||
1271 | if(fd == dev->fd) | ||
1272 | panic("Write fault on a backing page of ubd " | ||
1273 | "device %d\n", i); | ||
1274 | |||
1275 | /* So, we do the write, copying the backing data to the COW | ||
1276 | * file... | ||
1277 | */ | ||
1278 | |||
1279 | err = os_seek_file(dev->fd, offset + dev->cow.data_offset); | ||
1280 | if(err < 0) | ||
1281 | panic("Couldn't seek to %lld in COW file of ubd " | ||
1282 | "device %d, err = %d", | ||
1283 | offset + dev->cow.data_offset, i, -err); | ||
1284 | |||
1285 | n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); | ||
1286 | if(n != PAGE_SIZE) | ||
1287 | panic("Couldn't copy data to COW file of ubd " | ||
1288 | "device %d, err = %d", i, -n); | ||
1289 | |||
1290 | /* ... updating the COW bitmap... */ | ||
1291 | |||
1292 | cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, | ||
1293 | dev->cow.bitmap, dev->cow.bitmap_offset, | ||
1294 | new_bitmap, dev->cow.bitmap_len); | ||
1295 | |||
1296 | err = os_seek_file(dev->fd, bitmap_offset); | ||
1297 | if(err < 0) | ||
1298 | panic("Couldn't seek to %lld in COW file of ubd " | ||
1299 | "device %d, err = %d", bitmap_offset, i, -err); | ||
1300 | |||
1301 | n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); | ||
1302 | if(n != sizeof(new_bitmap)) | ||
1303 | panic("Couldn't update bitmap of ubd device %d, " | ||
1304 | "err = %d", i, -n); | ||
1305 | |||
1306 | /* Maybe we can map the COW page in, and maybe we can't. If | ||
1307 | * it is a pre-V3 COW file, we can't, since the alignment will | ||
1308 | * be wrong. If it is a V3 or later COW file which has been | ||
1309 | * moved to a system with a larger page size, then maybe we | ||
1310 | * can't, depending on the exact location of the page. | ||
1311 | */ | ||
1312 | |||
1313 | offset += dev->cow.data_offset; | ||
1314 | |||
1315 | /* Remove the remapping, putting the original anonymous page | ||
1316 | * back. If the COW file can be mapped in, that is done. | ||
1317 | * Otherwise, the COW page is read in. | ||
1318 | */ | ||
1319 | |||
1320 | if(!physmem_remove_mapping((void *) address)) | ||
1321 | panic("Address 0x%lx not remapped by ubd device %d", | ||
1322 | address, i); | ||
1323 | if((offset % UBD_MMAP_BLOCK_SIZE) == 0) | ||
1324 | physmem_subst_mapping((void *) address, dev->fd, | ||
1325 | offset, 1); | ||
1326 | else { | ||
1327 | err = os_seek_file(dev->fd, offset); | ||
1328 | if(err < 0) | ||
1329 | panic("Couldn't seek to %lld in COW file of " | ||
1330 | "ubd device %d, err = %d", offset, i, | ||
1331 | -err); | ||
1332 | |||
1333 | n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); | ||
1334 | if(n != PAGE_SIZE) | ||
1335 | panic("Failed to read page from offset %llx of " | ||
1336 | "COW file of ubd device %d, err = %d", | ||
1337 | offset, i, -n); | ||
1338 | } | ||
1339 | |||
1340 | return(1); | ||
1341 | } | ||
1342 | |||
1343 | /* It's not a write on a ubd device */ | ||
1344 | return(0); | ||
1345 | } | ||
1346 | |||
1347 | static struct remapper ubd_remapper = { | ||
1348 | .list = LIST_HEAD_INIT(ubd_remapper.list), | ||
1349 | .proc = ubd_check_remapped, | ||
1350 | }; | ||
1351 | |||
1352 | static int ubd_remapper_setup(void) | ||
1353 | { | ||
1354 | if(ubd_do_mmap) | ||
1355 | register_remapper(&ubd_remapper); | ||
1356 | |||
1357 | return(0); | ||
1358 | } | ||
1359 | |||
1360 | __initcall(ubd_remapper_setup); | ||
1361 | |||
1362 | static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) | ||
1363 | { | ||
1364 | struct uml_stat buf1, buf2; | ||
1365 | int err; | ||
1366 | |||
1367 | if(from_cmdline == NULL) return(1); | ||
1368 | if(!strcmp(from_cmdline, from_cow)) return(1); | ||
1369 | |||
1370 | err = os_stat_file(from_cmdline, &buf1); | ||
1371 | if(err < 0){ | ||
1372 | printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); | ||
1373 | return(1); | ||
1374 | } | ||
1375 | err = os_stat_file(from_cow, &buf2); | ||
1376 | if(err < 0){ | ||
1377 | printk("Couldn't stat '%s', err = %d\n", from_cow, -err); | ||
1378 | return(1); | ||
1379 | } | ||
1380 | if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) | ||
1381 | return(1); | ||
1382 | |||
1383 | printk("Backing file mismatch - \"%s\" requested,\n" | ||
1384 | "\"%s\" specified in COW header of \"%s\"\n", | ||
1385 | from_cmdline, from_cow, cow); | ||
1386 | return(0); | ||
1387 | } | ||
1388 | |||
1389 | static int backing_file_mismatch(char *file, __u64 size, time_t mtime) | ||
1390 | { | ||
1391 | unsigned long modtime; | ||
1392 | long long actual; | ||
1393 | int err; | ||
1394 | |||
1395 | err = os_file_modtime(file, &modtime); | ||
1396 | if(err < 0){ | ||
1397 | printk("Failed to get modification time of backing file " | ||
1398 | "\"%s\", err = %d\n", file, -err); | ||
1399 | return(err); | ||
1400 | } | ||
1401 | |||
1402 | err = os_file_size(file, &actual); | ||
1403 | if(err < 0){ | ||
1404 | printk("Failed to get size of backing file \"%s\", " | ||
1405 | "err = %d\n", file, -err); | ||
1406 | return(err); | ||
1407 | } | ||
1408 | |||
1409 | if(actual != size){ | ||
1410 | /*__u64 can be a long on AMD64 and with %lu GCC complains; so | ||
1411 | * the typecast.*/ | ||
1412 | printk("Size mismatch (%llu vs %llu) of COW header vs backing " | ||
1413 | "file\n", (unsigned long long) size, actual); | ||
1414 | return(-EINVAL); | ||
1415 | } | ||
1416 | if(modtime != mtime){ | ||
1417 | printk("mtime mismatch (%ld vs %ld) of COW header vs backing " | ||
1418 | "file\n", mtime, modtime); | ||
1419 | return(-EINVAL); | ||
1420 | } | ||
1421 | return(0); | ||
1422 | } | ||
1423 | |||
1424 | int read_cow_bitmap(int fd, void *buf, int offset, int len) | ||
1425 | { | ||
1426 | int err; | ||
1427 | |||
1428 | err = os_seek_file(fd, offset); | ||
1429 | if(err < 0) | ||
1430 | return(err); | ||
1431 | |||
1432 | err = os_read_file(fd, buf, len); | ||
1433 | if(err < 0) | ||
1434 | return(err); | ||
1435 | |||
1436 | return(0); | ||
1437 | } | ||
1438 | |||
1439 | int open_ubd_file(char *file, struct openflags *openflags, | ||
1440 | char **backing_file_out, int *bitmap_offset_out, | ||
1441 | unsigned long *bitmap_len_out, int *data_offset_out, | ||
1442 | int *create_cow_out) | ||
1443 | { | ||
1444 | time_t mtime; | ||
1445 | unsigned long long size; | ||
1446 | __u32 version, align; | ||
1447 | char *backing_file; | ||
1448 | int fd, err, sectorsize, same, mode = 0644; | ||
1449 | |||
1450 | fd = os_open_file(file, *openflags, mode); | ||
1451 | if(fd < 0){ | ||
1452 | if((fd == -ENOENT) && (create_cow_out != NULL)) | ||
1453 | *create_cow_out = 1; | ||
1454 | if(!openflags->w || | ||
1455 | ((fd != -EROFS) && (fd != -EACCES))) return(fd); | ||
1456 | openflags->w = 0; | ||
1457 | fd = os_open_file(file, *openflags, mode); | ||
1458 | if(fd < 0) | ||
1459 | return(fd); | ||
1460 | } | ||
1461 | |||
1462 | err = os_lock_file(fd, openflags->w); | ||
1463 | if(err < 0){ | ||
1464 | printk("Failed to lock '%s', err = %d\n", file, -err); | ||
1465 | goto out_close; | ||
1466 | } | ||
1467 | |||
1468 | if(backing_file_out == NULL) return(fd); | ||
1469 | |||
1470 | err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, | ||
1471 | &size, §orsize, &align, bitmap_offset_out); | ||
1472 | if(err && (*backing_file_out != NULL)){ | ||
1473 | printk("Failed to read COW header from COW file \"%s\", " | ||
1474 | "errno = %d\n", file, -err); | ||
1475 | goto out_close; | ||
1476 | } | ||
1477 | if(err) return(fd); | ||
1478 | |||
1479 | if(backing_file_out == NULL) return(fd); | ||
1480 | |||
1481 | same = same_backing_files(*backing_file_out, backing_file, file); | ||
1482 | |||
1483 | if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ | ||
1484 | printk("Switching backing file to '%s'\n", *backing_file_out); | ||
1485 | err = write_cow_header(file, fd, *backing_file_out, | ||
1486 | sectorsize, align, &size); | ||
1487 | if(err){ | ||
1488 | printk("Switch failed, errno = %d\n", -err); | ||
1489 | return(err); | ||
1490 | } | ||
1491 | } | ||
1492 | else { | ||
1493 | *backing_file_out = backing_file; | ||
1494 | err = backing_file_mismatch(*backing_file_out, size, mtime); | ||
1495 | if(err) goto out_close; | ||
1496 | } | ||
1497 | |||
1498 | cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, | ||
1499 | bitmap_len_out, data_offset_out); | ||
1500 | |||
1501 | return(fd); | ||
1502 | out_close: | ||
1503 | os_close_file(fd); | ||
1504 | return(err); | ||
1505 | } | ||
1506 | |||
1507 | int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, | ||
1508 | int sectorsize, int alignment, int *bitmap_offset_out, | ||
1509 | unsigned long *bitmap_len_out, int *data_offset_out) | ||
1510 | { | ||
1511 | int err, fd; | ||
1512 | |||
1513 | flags.c = 1; | ||
1514 | fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); | ||
1515 | if(fd < 0){ | ||
1516 | err = fd; | ||
1517 | printk("Open of COW file '%s' failed, errno = %d\n", cow_file, | ||
1518 | -err); | ||
1519 | goto out; | ||
1520 | } | ||
1521 | |||
1522 | err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, | ||
1523 | bitmap_offset_out, bitmap_len_out, | ||
1524 | data_offset_out); | ||
1525 | if(!err) | ||
1526 | return(fd); | ||
1527 | os_close_file(fd); | ||
1528 | out: | ||
1529 | return(err); | ||
1530 | } | ||
1531 | |||
1532 | static int update_bitmap(struct io_thread_req *req) | ||
1533 | { | ||
1534 | int n; | ||
1535 | |||
1536 | if(req->cow_offset == -1) | ||
1537 | return(0); | ||
1538 | |||
1539 | n = os_seek_file(req->fds[1], req->cow_offset); | ||
1540 | if(n < 0){ | ||
1541 | printk("do_io - bitmap lseek failed : err = %d\n", -n); | ||
1542 | return(1); | ||
1543 | } | ||
1544 | |||
1545 | n = os_write_file(req->fds[1], &req->bitmap_words, | ||
1546 | sizeof(req->bitmap_words)); | ||
1547 | if(n != sizeof(req->bitmap_words)){ | ||
1548 | printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, | ||
1549 | req->fds[1]); | ||
1550 | return(1); | ||
1551 | } | ||
1552 | |||
1553 | return(0); | ||
1554 | } | ||
1555 | |||
1556 | void do_io(struct io_thread_req *req) | ||
1557 | { | ||
1558 | char *buf; | ||
1559 | unsigned long len; | ||
1560 | int n, nsectors, start, end, bit; | ||
1561 | int err; | ||
1562 | __u64 off; | ||
1563 | |||
1564 | if(req->op == UBD_MMAP){ | ||
1565 | /* Touch the page to force the host to do any necessary IO to | ||
1566 | * get it into memory | ||
1567 | */ | ||
1568 | n = *((volatile int *) req->buffer); | ||
1569 | req->error = update_bitmap(req); | ||
1570 | return; | ||
1571 | } | ||
1572 | |||
1573 | nsectors = req->length / req->sectorsize; | ||
1574 | start = 0; | ||
1575 | do { | ||
1576 | bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); | ||
1577 | end = start; | ||
1578 | while((end < nsectors) && | ||
1579 | (ubd_test_bit(end, (unsigned char *) | ||
1580 | &req->sector_mask) == bit)) | ||
1581 | end++; | ||
1582 | |||
1583 | off = req->offset + req->offsets[bit] + | ||
1584 | start * req->sectorsize; | ||
1585 | len = (end - start) * req->sectorsize; | ||
1586 | buf = &req->buffer[start * req->sectorsize]; | ||
1587 | |||
1588 | err = os_seek_file(req->fds[bit], off); | ||
1589 | if(err < 0){ | ||
1590 | printk("do_io - lseek failed : err = %d\n", -err); | ||
1591 | req->error = 1; | ||
1592 | return; | ||
1593 | } | ||
1594 | if(req->op == UBD_READ){ | ||
1595 | n = 0; | ||
1596 | do { | ||
1597 | buf = &buf[n]; | ||
1598 | len -= n; | ||
1599 | n = os_read_file(req->fds[bit], buf, len); | ||
1600 | if (n < 0) { | ||
1601 | printk("do_io - read failed, err = %d " | ||
1602 | "fd = %d\n", -n, req->fds[bit]); | ||
1603 | req->error = 1; | ||
1604 | return; | ||
1605 | } | ||
1606 | } while((n < len) && (n != 0)); | ||
1607 | if (n < len) memset(&buf[n], 0, len - n); | ||
1608 | } | ||
1609 | else { | ||
1610 | n = os_write_file(req->fds[bit], buf, len); | ||
1611 | if(n != len){ | ||
1612 | printk("do_io - write failed err = %d " | ||
1613 | "fd = %d\n", -n, req->fds[bit]); | ||
1614 | req->error = 1; | ||
1615 | return; | ||
1616 | } | ||
1617 | } | ||
1618 | |||
1619 | start = end; | ||
1620 | } while(start < nsectors); | ||
1621 | |||
1622 | req->error = update_bitmap(req); | ||
1623 | } | ||
1624 | |||
1625 | /* Changed in start_io_thread, which is serialized by being called only | ||
1626 | * from ubd_init, which is an initcall. | ||
1627 | */ | ||
1628 | int kernel_fd = -1; | ||
1629 | |||
1630 | /* Only changed by the io thread */ | ||
1631 | int io_count = 0; | ||
1632 | |||
1633 | int io_thread(void *arg) | ||
1634 | { | ||
1635 | struct io_thread_req req; | ||
1636 | int n; | ||
1637 | |||
1638 | ignore_sigwinch_sig(); | ||
1639 | while(1){ | ||
1640 | n = os_read_file(kernel_fd, &req, sizeof(req)); | ||
1641 | if(n != sizeof(req)){ | ||
1642 | if(n < 0) | ||
1643 | printk("io_thread - read failed, fd = %d, " | ||
1644 | "err = %d\n", kernel_fd, -n); | ||
1645 | else { | ||
1646 | printk("io_thread - short read, fd = %d, " | ||
1647 | "length = %d\n", kernel_fd, n); | ||
1648 | } | ||
1649 | continue; | ||
1650 | } | ||
1651 | io_count++; | ||
1652 | do_io(&req); | ||
1653 | n = os_write_file(kernel_fd, &req, sizeof(req)); | ||
1654 | if(n != sizeof(req)) | ||
1655 | printk("io_thread - write failed, fd = %d, err = %d\n", | ||
1656 | kernel_fd, -n); | ||
1657 | } | ||
1658 | } | ||
1659 | |||
1660 | /* | ||
1661 | * Overrides for Emacs so that we follow Linus's tabbing style. | ||
1662 | * Emacs will notice this stuff at the end of the file and automatically | ||
1663 | * adjust the settings for this buffer only. This must remain at the end | ||
1664 | * of the file. | ||
1665 | * --------------------------------------------------------------------------- | ||
1666 | * Local variables: | ||
1667 | * c-file-style: "linux" | ||
1668 | * End: | ||
1669 | */ | ||