diff options
Diffstat (limited to 'drivers/block/loop.c')
-rw-r--r-- | drivers/block/loop.c | 1348 |
1 files changed, 1348 insertions, 0 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c new file mode 100644 index 000000000000..6f011d0d8e97 --- /dev/null +++ b/drivers/block/loop.c | |||
@@ -0,0 +1,1348 @@ | |||
1 | /* | ||
2 | * linux/drivers/block/loop.c | ||
3 | * | ||
4 | * Written by Theodore Ts'o, 3/29/93 | ||
5 | * | ||
6 | * Copyright 1993 by Theodore Ts'o. Redistribution of this file is | ||
7 | * permitted under the GNU General Public License. | ||
8 | * | ||
9 | * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 | ||
10 | * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 | ||
11 | * | ||
12 | * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 | ||
13 | * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 | ||
14 | * | ||
15 | * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 | ||
16 | * | ||
17 | * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 | ||
18 | * | ||
19 | * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 | ||
20 | * | ||
21 | * Loadable modules and other fixes by AK, 1998 | ||
22 | * | ||
23 | * Make real block number available to downstream transfer functions, enables | ||
24 | * CBC (and relatives) mode encryption requiring unique IVs per data block. | ||
25 | * Reed H. Petty, rhp@draper.net | ||
26 | * | ||
27 | * Maximum number of loop devices now dynamic via max_loop module parameter. | ||
28 | * Russell Kroll <rkroll@exploits.org> 19990701 | ||
29 | * | ||
30 | * Maximum number of loop devices when compiled-in now selectable by passing | ||
31 | * max_loop=<1-255> to the kernel on boot. | ||
32 | * Erik I. Bolsų, <eriki@himolde.no>, Oct 31, 1999 | ||
33 | * | ||
34 | * Completely rewrite request handling to be make_request_fn style and | ||
35 | * non blocking, pushing work to a helper thread. Lots of fixes from | ||
36 | * Al Viro too. | ||
37 | * Jens Axboe <axboe@suse.de>, Nov 2000 | ||
38 | * | ||
39 | * Support up to 256 loop devices | ||
40 | * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 | ||
41 | * | ||
42 | * Support for falling back on the write file operation when the address space | ||
43 | * operations prepare_write and/or commit_write are not available on the | ||
44 | * backing filesystem. | ||
45 | * Anton Altaparmakov, 16 Feb 2005 | ||
46 | * | ||
47 | * Still To Fix: | ||
48 | * - Advisory locking is ignored here. | ||
49 | * - Should use an own CAP_* category instead of CAP_SYS_ADMIN | ||
50 | * | ||
51 | */ | ||
52 | |||
53 | #include <linux/config.h> | ||
54 | #include <linux/module.h> | ||
55 | #include <linux/moduleparam.h> | ||
56 | #include <linux/sched.h> | ||
57 | #include <linux/fs.h> | ||
58 | #include <linux/file.h> | ||
59 | #include <linux/stat.h> | ||
60 | #include <linux/errno.h> | ||
61 | #include <linux/major.h> | ||
62 | #include <linux/wait.h> | ||
63 | #include <linux/blkdev.h> | ||
64 | #include <linux/blkpg.h> | ||
65 | #include <linux/init.h> | ||
66 | #include <linux/devfs_fs_kernel.h> | ||
67 | #include <linux/smp_lock.h> | ||
68 | #include <linux/swap.h> | ||
69 | #include <linux/slab.h> | ||
70 | #include <linux/loop.h> | ||
71 | #include <linux/suspend.h> | ||
72 | #include <linux/writeback.h> | ||
73 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ | ||
74 | #include <linux/completion.h> | ||
75 | #include <linux/highmem.h> | ||
76 | #include <linux/gfp.h> | ||
77 | |||
78 | #include <asm/uaccess.h> | ||
79 | |||
80 | static int max_loop = 8; | ||
81 | static struct loop_device *loop_dev; | ||
82 | static struct gendisk **disks; | ||
83 | |||
84 | /* | ||
85 | * Transfer functions | ||
86 | */ | ||
87 | static int transfer_none(struct loop_device *lo, int cmd, | ||
88 | struct page *raw_page, unsigned raw_off, | ||
89 | struct page *loop_page, unsigned loop_off, | ||
90 | int size, sector_t real_block) | ||
91 | { | ||
92 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; | ||
93 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; | ||
94 | |||
95 | if (cmd == READ) | ||
96 | memcpy(loop_buf, raw_buf, size); | ||
97 | else | ||
98 | memcpy(raw_buf, loop_buf, size); | ||
99 | |||
100 | kunmap_atomic(raw_buf, KM_USER0); | ||
101 | kunmap_atomic(loop_buf, KM_USER1); | ||
102 | cond_resched(); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static int transfer_xor(struct loop_device *lo, int cmd, | ||
107 | struct page *raw_page, unsigned raw_off, | ||
108 | struct page *loop_page, unsigned loop_off, | ||
109 | int size, sector_t real_block) | ||
110 | { | ||
111 | char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; | ||
112 | char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; | ||
113 | char *in, *out, *key; | ||
114 | int i, keysize; | ||
115 | |||
116 | if (cmd == READ) { | ||
117 | in = raw_buf; | ||
118 | out = loop_buf; | ||
119 | } else { | ||
120 | in = loop_buf; | ||
121 | out = raw_buf; | ||
122 | } | ||
123 | |||
124 | key = lo->lo_encrypt_key; | ||
125 | keysize = lo->lo_encrypt_key_size; | ||
126 | for (i = 0; i < size; i++) | ||
127 | *out++ = *in++ ^ key[(i & 511) % keysize]; | ||
128 | |||
129 | kunmap_atomic(raw_buf, KM_USER0); | ||
130 | kunmap_atomic(loop_buf, KM_USER1); | ||
131 | cond_resched(); | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static int xor_init(struct loop_device *lo, const struct loop_info64 *info) | ||
136 | { | ||
137 | if (unlikely(info->lo_encrypt_key_size <= 0)) | ||
138 | return -EINVAL; | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static struct loop_func_table none_funcs = { | ||
143 | .number = LO_CRYPT_NONE, | ||
144 | .transfer = transfer_none, | ||
145 | }; | ||
146 | |||
147 | static struct loop_func_table xor_funcs = { | ||
148 | .number = LO_CRYPT_XOR, | ||
149 | .transfer = transfer_xor, | ||
150 | .init = xor_init | ||
151 | }; | ||
152 | |||
153 | /* xfer_funcs[0] is special - its release function is never called */ | ||
154 | static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { | ||
155 | &none_funcs, | ||
156 | &xor_funcs | ||
157 | }; | ||
158 | |||
159 | static loff_t get_loop_size(struct loop_device *lo, struct file *file) | ||
160 | { | ||
161 | loff_t size, offset, loopsize; | ||
162 | |||
163 | /* Compute loopsize in bytes */ | ||
164 | size = i_size_read(file->f_mapping->host); | ||
165 | offset = lo->lo_offset; | ||
166 | loopsize = size - offset; | ||
167 | if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) | ||
168 | loopsize = lo->lo_sizelimit; | ||
169 | |||
170 | /* | ||
171 | * Unfortunately, if we want to do I/O on the device, | ||
172 | * the number of 512-byte sectors has to fit into a sector_t. | ||
173 | */ | ||
174 | return loopsize >> 9; | ||
175 | } | ||
176 | |||
177 | static int | ||
178 | figure_loop_size(struct loop_device *lo) | ||
179 | { | ||
180 | loff_t size = get_loop_size(lo, lo->lo_backing_file); | ||
181 | sector_t x = (sector_t)size; | ||
182 | |||
183 | if (unlikely((loff_t)x != size)) | ||
184 | return -EFBIG; | ||
185 | |||
186 | set_capacity(disks[lo->lo_number], x); | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static inline int | ||
191 | lo_do_transfer(struct loop_device *lo, int cmd, | ||
192 | struct page *rpage, unsigned roffs, | ||
193 | struct page *lpage, unsigned loffs, | ||
194 | int size, sector_t rblock) | ||
195 | { | ||
196 | if (unlikely(!lo->transfer)) | ||
197 | return 0; | ||
198 | |||
199 | return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); | ||
200 | } | ||
201 | |||
202 | /** | ||
203 | * do_lo_send_aops - helper for writing data to a loop device | ||
204 | * | ||
205 | * This is the fast version for backing filesystems which implement the address | ||
206 | * space operations prepare_write and commit_write. | ||
207 | */ | ||
208 | static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | ||
209 | int bsize, loff_t pos, struct page *page) | ||
210 | { | ||
211 | struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ | ||
212 | struct address_space *mapping = file->f_mapping; | ||
213 | struct address_space_operations *aops = mapping->a_ops; | ||
214 | pgoff_t index; | ||
215 | unsigned offset, bv_offs; | ||
216 | int len, ret = 0; | ||
217 | |||
218 | down(&mapping->host->i_sem); | ||
219 | index = pos >> PAGE_CACHE_SHIFT; | ||
220 | offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); | ||
221 | bv_offs = bvec->bv_offset; | ||
222 | len = bvec->bv_len; | ||
223 | while (len > 0) { | ||
224 | sector_t IV; | ||
225 | unsigned size; | ||
226 | int transfer_result; | ||
227 | |||
228 | IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); | ||
229 | size = PAGE_CACHE_SIZE - offset; | ||
230 | if (size > len) | ||
231 | size = len; | ||
232 | page = grab_cache_page(mapping, index); | ||
233 | if (unlikely(!page)) | ||
234 | goto fail; | ||
235 | if (unlikely(aops->prepare_write(file, page, offset, | ||
236 | offset + size))) | ||
237 | goto unlock; | ||
238 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, | ||
239 | bvec->bv_page, bv_offs, size, IV); | ||
240 | if (unlikely(transfer_result)) { | ||
241 | char *kaddr; | ||
242 | |||
243 | /* | ||
244 | * The transfer failed, but we still write the data to | ||
245 | * keep prepare/commit calls balanced. | ||
246 | */ | ||
247 | printk(KERN_ERR "loop: transfer error block %llu\n", | ||
248 | (unsigned long long)index); | ||
249 | kaddr = kmap_atomic(page, KM_USER0); | ||
250 | memset(kaddr + offset, 0, size); | ||
251 | kunmap_atomic(kaddr, KM_USER0); | ||
252 | } | ||
253 | flush_dcache_page(page); | ||
254 | if (unlikely(aops->commit_write(file, page, offset, | ||
255 | offset + size))) | ||
256 | goto unlock; | ||
257 | if (unlikely(transfer_result)) | ||
258 | goto unlock; | ||
259 | bv_offs += size; | ||
260 | len -= size; | ||
261 | offset = 0; | ||
262 | index++; | ||
263 | pos += size; | ||
264 | unlock_page(page); | ||
265 | page_cache_release(page); | ||
266 | } | ||
267 | out: | ||
268 | up(&mapping->host->i_sem); | ||
269 | return ret; | ||
270 | unlock: | ||
271 | unlock_page(page); | ||
272 | page_cache_release(page); | ||
273 | fail: | ||
274 | ret = -1; | ||
275 | goto out; | ||
276 | } | ||
277 | |||
278 | /** | ||
279 | * __do_lo_send_write - helper for writing data to a loop device | ||
280 | * | ||
281 | * This helper just factors out common code between do_lo_send_direct_write() | ||
282 | * and do_lo_send_write(). | ||
283 | */ | ||
284 | static inline int __do_lo_send_write(struct file *file, | ||
285 | u8 __user *buf, const int len, loff_t pos) | ||
286 | { | ||
287 | ssize_t bw; | ||
288 | mm_segment_t old_fs = get_fs(); | ||
289 | |||
290 | set_fs(get_ds()); | ||
291 | bw = file->f_op->write(file, buf, len, &pos); | ||
292 | set_fs(old_fs); | ||
293 | if (likely(bw == len)) | ||
294 | return 0; | ||
295 | printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", | ||
296 | (unsigned long long)pos, len); | ||
297 | if (bw >= 0) | ||
298 | bw = -EIO; | ||
299 | return bw; | ||
300 | } | ||
301 | |||
302 | /** | ||
303 | * do_lo_send_direct_write - helper for writing data to a loop device | ||
304 | * | ||
305 | * This is the fast, non-transforming version for backing filesystems which do | ||
306 | * not implement the address space operations prepare_write and commit_write. | ||
307 | * It uses the write file operation which should be present on all writeable | ||
308 | * filesystems. | ||
309 | */ | ||
310 | static int do_lo_send_direct_write(struct loop_device *lo, | ||
311 | struct bio_vec *bvec, int bsize, loff_t pos, struct page *page) | ||
312 | { | ||
313 | ssize_t bw = __do_lo_send_write(lo->lo_backing_file, | ||
314 | (u8 __user *)kmap(bvec->bv_page) + bvec->bv_offset, | ||
315 | bvec->bv_len, pos); | ||
316 | kunmap(bvec->bv_page); | ||
317 | cond_resched(); | ||
318 | return bw; | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * do_lo_send_write - helper for writing data to a loop device | ||
323 | * | ||
324 | * This is the slow, transforming version for filesystems which do not | ||
325 | * implement the address space operations prepare_write and commit_write. It | ||
326 | * uses the write file operation which should be present on all writeable | ||
327 | * filesystems. | ||
328 | * | ||
329 | * Using fops->write is slower than using aops->{prepare,commit}_write in the | ||
330 | * transforming case because we need to double buffer the data as we cannot do | ||
331 | * the transformations in place as we do not have direct access to the | ||
332 | * destination pages of the backing file. | ||
333 | */ | ||
334 | static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, | ||
335 | int bsize, loff_t pos, struct page *page) | ||
336 | { | ||
337 | int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, | ||
338 | bvec->bv_offset, bvec->bv_len, pos >> 9); | ||
339 | if (likely(!ret)) | ||
340 | return __do_lo_send_write(lo->lo_backing_file, | ||
341 | (u8 __user *)page_address(page), bvec->bv_len, | ||
342 | pos); | ||
343 | printk(KERN_ERR "loop: Transfer error at byte offset %llu, " | ||
344 | "length %i.\n", (unsigned long long)pos, bvec->bv_len); | ||
345 | if (ret > 0) | ||
346 | ret = -EIO; | ||
347 | return ret; | ||
348 | } | ||
349 | |||
350 | static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, | ||
351 | loff_t pos) | ||
352 | { | ||
353 | int (*do_lo_send)(struct loop_device *, struct bio_vec *, int, loff_t, | ||
354 | struct page *page); | ||
355 | struct bio_vec *bvec; | ||
356 | struct page *page = NULL; | ||
357 | int i, ret = 0; | ||
358 | |||
359 | do_lo_send = do_lo_send_aops; | ||
360 | if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { | ||
361 | do_lo_send = do_lo_send_direct_write; | ||
362 | if (lo->transfer != transfer_none) { | ||
363 | page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); | ||
364 | if (unlikely(!page)) | ||
365 | goto fail; | ||
366 | kmap(page); | ||
367 | do_lo_send = do_lo_send_write; | ||
368 | } | ||
369 | } | ||
370 | bio_for_each_segment(bvec, bio, i) { | ||
371 | ret = do_lo_send(lo, bvec, bsize, pos, page); | ||
372 | if (ret < 0) | ||
373 | break; | ||
374 | pos += bvec->bv_len; | ||
375 | } | ||
376 | if (page) { | ||
377 | kunmap(page); | ||
378 | __free_page(page); | ||
379 | } | ||
380 | out: | ||
381 | return ret; | ||
382 | fail: | ||
383 | printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); | ||
384 | ret = -ENOMEM; | ||
385 | goto out; | ||
386 | } | ||
387 | |||
388 | struct lo_read_data { | ||
389 | struct loop_device *lo; | ||
390 | struct page *page; | ||
391 | unsigned offset; | ||
392 | int bsize; | ||
393 | }; | ||
394 | |||
395 | static int | ||
396 | lo_read_actor(read_descriptor_t *desc, struct page *page, | ||
397 | unsigned long offset, unsigned long size) | ||
398 | { | ||
399 | unsigned long count = desc->count; | ||
400 | struct lo_read_data *p = desc->arg.data; | ||
401 | struct loop_device *lo = p->lo; | ||
402 | sector_t IV; | ||
403 | |||
404 | IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); | ||
405 | |||
406 | if (size > count) | ||
407 | size = count; | ||
408 | |||
409 | if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) { | ||
410 | size = 0; | ||
411 | printk(KERN_ERR "loop: transfer error block %ld\n", | ||
412 | page->index); | ||
413 | desc->error = -EINVAL; | ||
414 | } | ||
415 | |||
416 | flush_dcache_page(p->page); | ||
417 | |||
418 | desc->count = count - size; | ||
419 | desc->written += size; | ||
420 | p->offset += size; | ||
421 | return size; | ||
422 | } | ||
423 | |||
424 | static int | ||
425 | do_lo_receive(struct loop_device *lo, | ||
426 | struct bio_vec *bvec, int bsize, loff_t pos) | ||
427 | { | ||
428 | struct lo_read_data cookie; | ||
429 | struct file *file; | ||
430 | int retval; | ||
431 | |||
432 | cookie.lo = lo; | ||
433 | cookie.page = bvec->bv_page; | ||
434 | cookie.offset = bvec->bv_offset; | ||
435 | cookie.bsize = bsize; | ||
436 | file = lo->lo_backing_file; | ||
437 | retval = file->f_op->sendfile(file, &pos, bvec->bv_len, | ||
438 | lo_read_actor, &cookie); | ||
439 | return (retval < 0)? retval: 0; | ||
440 | } | ||
441 | |||
442 | static int | ||
443 | lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) | ||
444 | { | ||
445 | struct bio_vec *bvec; | ||
446 | int i, ret = 0; | ||
447 | |||
448 | bio_for_each_segment(bvec, bio, i) { | ||
449 | ret = do_lo_receive(lo, bvec, bsize, pos); | ||
450 | if (ret < 0) | ||
451 | break; | ||
452 | pos += bvec->bv_len; | ||
453 | } | ||
454 | return ret; | ||
455 | } | ||
456 | |||
457 | static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) | ||
458 | { | ||
459 | loff_t pos; | ||
460 | int ret; | ||
461 | |||
462 | pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; | ||
463 | if (bio_rw(bio) == WRITE) | ||
464 | ret = lo_send(lo, bio, lo->lo_blocksize, pos); | ||
465 | else | ||
466 | ret = lo_receive(lo, bio, lo->lo_blocksize, pos); | ||
467 | return ret; | ||
468 | } | ||
469 | |||
470 | /* | ||
471 | * Add bio to back of pending list | ||
472 | */ | ||
473 | static void loop_add_bio(struct loop_device *lo, struct bio *bio) | ||
474 | { | ||
475 | unsigned long flags; | ||
476 | |||
477 | spin_lock_irqsave(&lo->lo_lock, flags); | ||
478 | if (lo->lo_biotail) { | ||
479 | lo->lo_biotail->bi_next = bio; | ||
480 | lo->lo_biotail = bio; | ||
481 | } else | ||
482 | lo->lo_bio = lo->lo_biotail = bio; | ||
483 | spin_unlock_irqrestore(&lo->lo_lock, flags); | ||
484 | |||
485 | up(&lo->lo_bh_mutex); | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * Grab first pending buffer | ||
490 | */ | ||
491 | static struct bio *loop_get_bio(struct loop_device *lo) | ||
492 | { | ||
493 | struct bio *bio; | ||
494 | |||
495 | spin_lock_irq(&lo->lo_lock); | ||
496 | if ((bio = lo->lo_bio)) { | ||
497 | if (bio == lo->lo_biotail) | ||
498 | lo->lo_biotail = NULL; | ||
499 | lo->lo_bio = bio->bi_next; | ||
500 | bio->bi_next = NULL; | ||
501 | } | ||
502 | spin_unlock_irq(&lo->lo_lock); | ||
503 | |||
504 | return bio; | ||
505 | } | ||
506 | |||
507 | static int loop_make_request(request_queue_t *q, struct bio *old_bio) | ||
508 | { | ||
509 | struct loop_device *lo = q->queuedata; | ||
510 | int rw = bio_rw(old_bio); | ||
511 | |||
512 | if (!lo) | ||
513 | goto out; | ||
514 | |||
515 | spin_lock_irq(&lo->lo_lock); | ||
516 | if (lo->lo_state != Lo_bound) | ||
517 | goto inactive; | ||
518 | atomic_inc(&lo->lo_pending); | ||
519 | spin_unlock_irq(&lo->lo_lock); | ||
520 | |||
521 | if (rw == WRITE) { | ||
522 | if (lo->lo_flags & LO_FLAGS_READ_ONLY) | ||
523 | goto err; | ||
524 | } else if (rw == READA) { | ||
525 | rw = READ; | ||
526 | } else if (rw != READ) { | ||
527 | printk(KERN_ERR "loop: unknown command (%x)\n", rw); | ||
528 | goto err; | ||
529 | } | ||
530 | loop_add_bio(lo, old_bio); | ||
531 | return 0; | ||
532 | err: | ||
533 | if (atomic_dec_and_test(&lo->lo_pending)) | ||
534 | up(&lo->lo_bh_mutex); | ||
535 | out: | ||
536 | bio_io_error(old_bio, old_bio->bi_size); | ||
537 | return 0; | ||
538 | inactive: | ||
539 | spin_unlock_irq(&lo->lo_lock); | ||
540 | goto out; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * kick off io on the underlying address space | ||
545 | */ | ||
546 | static void loop_unplug(request_queue_t *q) | ||
547 | { | ||
548 | struct loop_device *lo = q->queuedata; | ||
549 | |||
550 | clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); | ||
551 | blk_run_address_space(lo->lo_backing_file->f_mapping); | ||
552 | } | ||
553 | |||
554 | struct switch_request { | ||
555 | struct file *file; | ||
556 | struct completion wait; | ||
557 | }; | ||
558 | |||
559 | static void do_loop_switch(struct loop_device *, struct switch_request *); | ||
560 | |||
561 | static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) | ||
562 | { | ||
563 | int ret; | ||
564 | |||
565 | if (unlikely(!bio->bi_bdev)) { | ||
566 | do_loop_switch(lo, bio->bi_private); | ||
567 | bio_put(bio); | ||
568 | } else { | ||
569 | ret = do_bio_filebacked(lo, bio); | ||
570 | bio_endio(bio, bio->bi_size, ret); | ||
571 | } | ||
572 | } | ||
573 | |||
574 | /* | ||
575 | * worker thread that handles reads/writes to file backed loop devices, | ||
576 | * to avoid blocking in our make_request_fn. it also does loop decrypting | ||
577 | * on reads for block backed loop, as that is too heavy to do from | ||
578 | * b_end_io context where irqs may be disabled. | ||
579 | */ | ||
580 | static int loop_thread(void *data) | ||
581 | { | ||
582 | struct loop_device *lo = data; | ||
583 | struct bio *bio; | ||
584 | |||
585 | daemonize("loop%d", lo->lo_number); | ||
586 | |||
587 | /* | ||
588 | * loop can be used in an encrypted device, | ||
589 | * hence, it mustn't be stopped at all | ||
590 | * because it could be indirectly used during suspension | ||
591 | */ | ||
592 | current->flags |= PF_NOFREEZE; | ||
593 | |||
594 | set_user_nice(current, -20); | ||
595 | |||
596 | lo->lo_state = Lo_bound; | ||
597 | atomic_inc(&lo->lo_pending); | ||
598 | |||
599 | /* | ||
600 | * up sem, we are running | ||
601 | */ | ||
602 | up(&lo->lo_sem); | ||
603 | |||
604 | for (;;) { | ||
605 | down_interruptible(&lo->lo_bh_mutex); | ||
606 | /* | ||
607 | * could be upped because of tear-down, not because of | ||
608 | * pending work | ||
609 | */ | ||
610 | if (!atomic_read(&lo->lo_pending)) | ||
611 | break; | ||
612 | |||
613 | bio = loop_get_bio(lo); | ||
614 | if (!bio) { | ||
615 | printk("loop: missing bio\n"); | ||
616 | continue; | ||
617 | } | ||
618 | loop_handle_bio(lo, bio); | ||
619 | |||
620 | /* | ||
621 | * upped both for pending work and tear-down, lo_pending | ||
622 | * will hit zero then | ||
623 | */ | ||
624 | if (atomic_dec_and_test(&lo->lo_pending)) | ||
625 | break; | ||
626 | } | ||
627 | |||
628 | up(&lo->lo_sem); | ||
629 | return 0; | ||
630 | } | ||
631 | |||
632 | /* | ||
633 | * loop_switch performs the hard work of switching a backing store. | ||
634 | * First it needs to flush existing IO, it does this by sending a magic | ||
635 | * BIO down the pipe. The completion of this BIO does the actual switch. | ||
636 | */ | ||
637 | static int loop_switch(struct loop_device *lo, struct file *file) | ||
638 | { | ||
639 | struct switch_request w; | ||
640 | struct bio *bio = bio_alloc(GFP_KERNEL, 1); | ||
641 | if (!bio) | ||
642 | return -ENOMEM; | ||
643 | init_completion(&w.wait); | ||
644 | w.file = file; | ||
645 | bio->bi_private = &w; | ||
646 | bio->bi_bdev = NULL; | ||
647 | loop_make_request(lo->lo_queue, bio); | ||
648 | wait_for_completion(&w.wait); | ||
649 | return 0; | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * Do the actual switch; called from the BIO completion routine | ||
654 | */ | ||
655 | static void do_loop_switch(struct loop_device *lo, struct switch_request *p) | ||
656 | { | ||
657 | struct file *file = p->file; | ||
658 | struct file *old_file = lo->lo_backing_file; | ||
659 | struct address_space *mapping = file->f_mapping; | ||
660 | |||
661 | mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); | ||
662 | lo->lo_backing_file = file; | ||
663 | lo->lo_blocksize = mapping->host->i_blksize; | ||
664 | lo->old_gfp_mask = mapping_gfp_mask(mapping); | ||
665 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); | ||
666 | complete(&p->wait); | ||
667 | } | ||
668 | |||
669 | |||
670 | /* | ||
671 | * loop_change_fd switched the backing store of a loopback device to | ||
672 | * a new file. This is useful for operating system installers to free up | ||
673 | * the original file and in High Availability environments to switch to | ||
674 | * an alternative location for the content in case of server meltdown. | ||
675 | * This can only work if the loop device is used read-only, and if the | ||
676 | * new backing store is the same size and type as the old backing store. | ||
677 | */ | ||
678 | static int loop_change_fd(struct loop_device *lo, struct file *lo_file, | ||
679 | struct block_device *bdev, unsigned int arg) | ||
680 | { | ||
681 | struct file *file, *old_file; | ||
682 | struct inode *inode; | ||
683 | int error; | ||
684 | |||
685 | error = -ENXIO; | ||
686 | if (lo->lo_state != Lo_bound) | ||
687 | goto out; | ||
688 | |||
689 | /* the loop device has to be read-only */ | ||
690 | error = -EINVAL; | ||
691 | if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) | ||
692 | goto out; | ||
693 | |||
694 | error = -EBADF; | ||
695 | file = fget(arg); | ||
696 | if (!file) | ||
697 | goto out; | ||
698 | |||
699 | inode = file->f_mapping->host; | ||
700 | old_file = lo->lo_backing_file; | ||
701 | |||
702 | error = -EINVAL; | ||
703 | |||
704 | if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) | ||
705 | goto out_putf; | ||
706 | |||
707 | /* new backing store needs to support loop (eg sendfile) */ | ||
708 | if (!inode->i_fop->sendfile) | ||
709 | goto out_putf; | ||
710 | |||
711 | /* size of the new backing store needs to be the same */ | ||
712 | if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) | ||
713 | goto out_putf; | ||
714 | |||
715 | /* and ... switch */ | ||
716 | error = loop_switch(lo, file); | ||
717 | if (error) | ||
718 | goto out_putf; | ||
719 | |||
720 | fput(old_file); | ||
721 | return 0; | ||
722 | |||
723 | out_putf: | ||
724 | fput(file); | ||
725 | out: | ||
726 | return error; | ||
727 | } | ||
728 | |||
729 | static inline int is_loop_device(struct file *file) | ||
730 | { | ||
731 | struct inode *i = file->f_mapping->host; | ||
732 | |||
733 | return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; | ||
734 | } | ||
735 | |||
736 | static int loop_set_fd(struct loop_device *lo, struct file *lo_file, | ||
737 | struct block_device *bdev, unsigned int arg) | ||
738 | { | ||
739 | struct file *file, *f; | ||
740 | struct inode *inode; | ||
741 | struct address_space *mapping; | ||
742 | unsigned lo_blocksize; | ||
743 | int lo_flags = 0; | ||
744 | int error; | ||
745 | loff_t size; | ||
746 | |||
747 | /* This is safe, since we have a reference from open(). */ | ||
748 | __module_get(THIS_MODULE); | ||
749 | |||
750 | error = -EBADF; | ||
751 | file = fget(arg); | ||
752 | if (!file) | ||
753 | goto out; | ||
754 | |||
755 | error = -EBUSY; | ||
756 | if (lo->lo_state != Lo_unbound) | ||
757 | goto out_putf; | ||
758 | |||
759 | /* Avoid recursion */ | ||
760 | f = file; | ||
761 | while (is_loop_device(f)) { | ||
762 | struct loop_device *l; | ||
763 | |||
764 | if (f->f_mapping->host->i_rdev == lo_file->f_mapping->host->i_rdev) | ||
765 | goto out_putf; | ||
766 | |||
767 | l = f->f_mapping->host->i_bdev->bd_disk->private_data; | ||
768 | if (l->lo_state == Lo_unbound) { | ||
769 | error = -EINVAL; | ||
770 | goto out_putf; | ||
771 | } | ||
772 | f = l->lo_backing_file; | ||
773 | } | ||
774 | |||
775 | mapping = file->f_mapping; | ||
776 | inode = mapping->host; | ||
777 | |||
778 | if (!(file->f_mode & FMODE_WRITE)) | ||
779 | lo_flags |= LO_FLAGS_READ_ONLY; | ||
780 | |||
781 | error = -EINVAL; | ||
782 | if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
783 | struct address_space_operations *aops = mapping->a_ops; | ||
784 | /* | ||
785 | * If we can't read - sorry. If we only can't write - well, | ||
786 | * it's going to be read-only. | ||
787 | */ | ||
788 | if (!file->f_op->sendfile) | ||
789 | goto out_putf; | ||
790 | if (aops->prepare_write && aops->commit_write) | ||
791 | lo_flags |= LO_FLAGS_USE_AOPS; | ||
792 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) | ||
793 | lo_flags |= LO_FLAGS_READ_ONLY; | ||
794 | |||
795 | lo_blocksize = inode->i_blksize; | ||
796 | error = 0; | ||
797 | } else { | ||
798 | goto out_putf; | ||
799 | } | ||
800 | |||
801 | size = get_loop_size(lo, file); | ||
802 | |||
803 | if ((loff_t)(sector_t)size != size) { | ||
804 | error = -EFBIG; | ||
805 | goto out_putf; | ||
806 | } | ||
807 | |||
808 | if (!(lo_file->f_mode & FMODE_WRITE)) | ||
809 | lo_flags |= LO_FLAGS_READ_ONLY; | ||
810 | |||
811 | set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); | ||
812 | |||
813 | lo->lo_blocksize = lo_blocksize; | ||
814 | lo->lo_device = bdev; | ||
815 | lo->lo_flags = lo_flags; | ||
816 | lo->lo_backing_file = file; | ||
817 | lo->transfer = NULL; | ||
818 | lo->ioctl = NULL; | ||
819 | lo->lo_sizelimit = 0; | ||
820 | lo->old_gfp_mask = mapping_gfp_mask(mapping); | ||
821 | mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); | ||
822 | |||
823 | lo->lo_bio = lo->lo_biotail = NULL; | ||
824 | |||
825 | /* | ||
826 | * set queue make_request_fn, and add limits based on lower level | ||
827 | * device | ||
828 | */ | ||
829 | blk_queue_make_request(lo->lo_queue, loop_make_request); | ||
830 | lo->lo_queue->queuedata = lo; | ||
831 | lo->lo_queue->unplug_fn = loop_unplug; | ||
832 | |||
833 | set_capacity(disks[lo->lo_number], size); | ||
834 | bd_set_size(bdev, size << 9); | ||
835 | |||
836 | set_blocksize(bdev, lo_blocksize); | ||
837 | |||
838 | kernel_thread(loop_thread, lo, CLONE_KERNEL); | ||
839 | down(&lo->lo_sem); | ||
840 | return 0; | ||
841 | |||
842 | out_putf: | ||
843 | fput(file); | ||
844 | out: | ||
845 | /* This is safe: open() is still holding a reference. */ | ||
846 | module_put(THIS_MODULE); | ||
847 | return error; | ||
848 | } | ||
849 | |||
850 | static int | ||
851 | loop_release_xfer(struct loop_device *lo) | ||
852 | { | ||
853 | int err = 0; | ||
854 | struct loop_func_table *xfer = lo->lo_encryption; | ||
855 | |||
856 | if (xfer) { | ||
857 | if (xfer->release) | ||
858 | err = xfer->release(lo); | ||
859 | lo->transfer = NULL; | ||
860 | lo->lo_encryption = NULL; | ||
861 | module_put(xfer->owner); | ||
862 | } | ||
863 | return err; | ||
864 | } | ||
865 | |||
866 | static int | ||
867 | loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, | ||
868 | const struct loop_info64 *i) | ||
869 | { | ||
870 | int err = 0; | ||
871 | |||
872 | if (xfer) { | ||
873 | struct module *owner = xfer->owner; | ||
874 | |||
875 | if (!try_module_get(owner)) | ||
876 | return -EINVAL; | ||
877 | if (xfer->init) | ||
878 | err = xfer->init(lo, i); | ||
879 | if (err) | ||
880 | module_put(owner); | ||
881 | else | ||
882 | lo->lo_encryption = xfer; | ||
883 | } | ||
884 | return err; | ||
885 | } | ||
886 | |||
887 | static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) | ||
888 | { | ||
889 | struct file *filp = lo->lo_backing_file; | ||
890 | int gfp = lo->old_gfp_mask; | ||
891 | |||
892 | if (lo->lo_state != Lo_bound) | ||
893 | return -ENXIO; | ||
894 | |||
895 | if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ | ||
896 | return -EBUSY; | ||
897 | |||
898 | if (filp == NULL) | ||
899 | return -EINVAL; | ||
900 | |||
901 | spin_lock_irq(&lo->lo_lock); | ||
902 | lo->lo_state = Lo_rundown; | ||
903 | if (atomic_dec_and_test(&lo->lo_pending)) | ||
904 | up(&lo->lo_bh_mutex); | ||
905 | spin_unlock_irq(&lo->lo_lock); | ||
906 | |||
907 | down(&lo->lo_sem); | ||
908 | |||
909 | lo->lo_backing_file = NULL; | ||
910 | |||
911 | loop_release_xfer(lo); | ||
912 | lo->transfer = NULL; | ||
913 | lo->ioctl = NULL; | ||
914 | lo->lo_device = NULL; | ||
915 | lo->lo_encryption = NULL; | ||
916 | lo->lo_offset = 0; | ||
917 | lo->lo_sizelimit = 0; | ||
918 | lo->lo_encrypt_key_size = 0; | ||
919 | lo->lo_flags = 0; | ||
920 | memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); | ||
921 | memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); | ||
922 | memset(lo->lo_file_name, 0, LO_NAME_SIZE); | ||
923 | invalidate_bdev(bdev, 0); | ||
924 | set_capacity(disks[lo->lo_number], 0); | ||
925 | bd_set_size(bdev, 0); | ||
926 | mapping_set_gfp_mask(filp->f_mapping, gfp); | ||
927 | lo->lo_state = Lo_unbound; | ||
928 | fput(filp); | ||
929 | /* This is safe: open() is still holding a reference. */ | ||
930 | module_put(THIS_MODULE); | ||
931 | return 0; | ||
932 | } | ||
933 | |||
934 | static int | ||
935 | loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | ||
936 | { | ||
937 | int err; | ||
938 | struct loop_func_table *xfer; | ||
939 | |||
940 | if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid && | ||
941 | !capable(CAP_SYS_ADMIN)) | ||
942 | return -EPERM; | ||
943 | if (lo->lo_state != Lo_bound) | ||
944 | return -ENXIO; | ||
945 | if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) | ||
946 | return -EINVAL; | ||
947 | |||
948 | err = loop_release_xfer(lo); | ||
949 | if (err) | ||
950 | return err; | ||
951 | |||
952 | if (info->lo_encrypt_type) { | ||
953 | unsigned int type = info->lo_encrypt_type; | ||
954 | |||
955 | if (type >= MAX_LO_CRYPT) | ||
956 | return -EINVAL; | ||
957 | xfer = xfer_funcs[type]; | ||
958 | if (xfer == NULL) | ||
959 | return -EINVAL; | ||
960 | } else | ||
961 | xfer = NULL; | ||
962 | |||
963 | err = loop_init_xfer(lo, xfer, info); | ||
964 | if (err) | ||
965 | return err; | ||
966 | |||
967 | if (lo->lo_offset != info->lo_offset || | ||
968 | lo->lo_sizelimit != info->lo_sizelimit) { | ||
969 | lo->lo_offset = info->lo_offset; | ||
970 | lo->lo_sizelimit = info->lo_sizelimit; | ||
971 | if (figure_loop_size(lo)) | ||
972 | return -EFBIG; | ||
973 | } | ||
974 | |||
975 | memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); | ||
976 | memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); | ||
977 | lo->lo_file_name[LO_NAME_SIZE-1] = 0; | ||
978 | lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; | ||
979 | |||
980 | if (!xfer) | ||
981 | xfer = &none_funcs; | ||
982 | lo->transfer = xfer->transfer; | ||
983 | lo->ioctl = xfer->ioctl; | ||
984 | |||
985 | lo->lo_encrypt_key_size = info->lo_encrypt_key_size; | ||
986 | lo->lo_init[0] = info->lo_init[0]; | ||
987 | lo->lo_init[1] = info->lo_init[1]; | ||
988 | if (info->lo_encrypt_key_size) { | ||
989 | memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, | ||
990 | info->lo_encrypt_key_size); | ||
991 | lo->lo_key_owner = current->uid; | ||
992 | } | ||
993 | |||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | static int | ||
998 | loop_get_status(struct loop_device *lo, struct loop_info64 *info) | ||
999 | { | ||
1000 | struct file *file = lo->lo_backing_file; | ||
1001 | struct kstat stat; | ||
1002 | int error; | ||
1003 | |||
1004 | if (lo->lo_state != Lo_bound) | ||
1005 | return -ENXIO; | ||
1006 | error = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); | ||
1007 | if (error) | ||
1008 | return error; | ||
1009 | memset(info, 0, sizeof(*info)); | ||
1010 | info->lo_number = lo->lo_number; | ||
1011 | info->lo_device = huge_encode_dev(stat.dev); | ||
1012 | info->lo_inode = stat.ino; | ||
1013 | info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); | ||
1014 | info->lo_offset = lo->lo_offset; | ||
1015 | info->lo_sizelimit = lo->lo_sizelimit; | ||
1016 | info->lo_flags = lo->lo_flags; | ||
1017 | memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); | ||
1018 | memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); | ||
1019 | info->lo_encrypt_type = | ||
1020 | lo->lo_encryption ? lo->lo_encryption->number : 0; | ||
1021 | if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { | ||
1022 | info->lo_encrypt_key_size = lo->lo_encrypt_key_size; | ||
1023 | memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, | ||
1024 | lo->lo_encrypt_key_size); | ||
1025 | } | ||
1026 | return 0; | ||
1027 | } | ||
1028 | |||
1029 | static void | ||
1030 | loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) | ||
1031 | { | ||
1032 | memset(info64, 0, sizeof(*info64)); | ||
1033 | info64->lo_number = info->lo_number; | ||
1034 | info64->lo_device = info->lo_device; | ||
1035 | info64->lo_inode = info->lo_inode; | ||
1036 | info64->lo_rdevice = info->lo_rdevice; | ||
1037 | info64->lo_offset = info->lo_offset; | ||
1038 | info64->lo_sizelimit = 0; | ||
1039 | info64->lo_encrypt_type = info->lo_encrypt_type; | ||
1040 | info64->lo_encrypt_key_size = info->lo_encrypt_key_size; | ||
1041 | info64->lo_flags = info->lo_flags; | ||
1042 | info64->lo_init[0] = info->lo_init[0]; | ||
1043 | info64->lo_init[1] = info->lo_init[1]; | ||
1044 | if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) | ||
1045 | memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); | ||
1046 | else | ||
1047 | memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); | ||
1048 | memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); | ||
1049 | } | ||
1050 | |||
1051 | static int | ||
1052 | loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) | ||
1053 | { | ||
1054 | memset(info, 0, sizeof(*info)); | ||
1055 | info->lo_number = info64->lo_number; | ||
1056 | info->lo_device = info64->lo_device; | ||
1057 | info->lo_inode = info64->lo_inode; | ||
1058 | info->lo_rdevice = info64->lo_rdevice; | ||
1059 | info->lo_offset = info64->lo_offset; | ||
1060 | info->lo_encrypt_type = info64->lo_encrypt_type; | ||
1061 | info->lo_encrypt_key_size = info64->lo_encrypt_key_size; | ||
1062 | info->lo_flags = info64->lo_flags; | ||
1063 | info->lo_init[0] = info64->lo_init[0]; | ||
1064 | info->lo_init[1] = info64->lo_init[1]; | ||
1065 | if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) | ||
1066 | memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); | ||
1067 | else | ||
1068 | memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); | ||
1069 | memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); | ||
1070 | |||
1071 | /* error in case values were truncated */ | ||
1072 | if (info->lo_device != info64->lo_device || | ||
1073 | info->lo_rdevice != info64->lo_rdevice || | ||
1074 | info->lo_inode != info64->lo_inode || | ||
1075 | info->lo_offset != info64->lo_offset) | ||
1076 | return -EOVERFLOW; | ||
1077 | |||
1078 | return 0; | ||
1079 | } | ||
1080 | |||
1081 | static int | ||
1082 | loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) | ||
1083 | { | ||
1084 | struct loop_info info; | ||
1085 | struct loop_info64 info64; | ||
1086 | |||
1087 | if (copy_from_user(&info, arg, sizeof (struct loop_info))) | ||
1088 | return -EFAULT; | ||
1089 | loop_info64_from_old(&info, &info64); | ||
1090 | return loop_set_status(lo, &info64); | ||
1091 | } | ||
1092 | |||
1093 | static int | ||
1094 | loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) | ||
1095 | { | ||
1096 | struct loop_info64 info64; | ||
1097 | |||
1098 | if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) | ||
1099 | return -EFAULT; | ||
1100 | return loop_set_status(lo, &info64); | ||
1101 | } | ||
1102 | |||
1103 | static int | ||
1104 | loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { | ||
1105 | struct loop_info info; | ||
1106 | struct loop_info64 info64; | ||
1107 | int err = 0; | ||
1108 | |||
1109 | if (!arg) | ||
1110 | err = -EINVAL; | ||
1111 | if (!err) | ||
1112 | err = loop_get_status(lo, &info64); | ||
1113 | if (!err) | ||
1114 | err = loop_info64_to_old(&info64, &info); | ||
1115 | if (!err && copy_to_user(arg, &info, sizeof(info))) | ||
1116 | err = -EFAULT; | ||
1117 | |||
1118 | return err; | ||
1119 | } | ||
1120 | |||
1121 | static int | ||
1122 | loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { | ||
1123 | struct loop_info64 info64; | ||
1124 | int err = 0; | ||
1125 | |||
1126 | if (!arg) | ||
1127 | err = -EINVAL; | ||
1128 | if (!err) | ||
1129 | err = loop_get_status(lo, &info64); | ||
1130 | if (!err && copy_to_user(arg, &info64, sizeof(info64))) | ||
1131 | err = -EFAULT; | ||
1132 | |||
1133 | return err; | ||
1134 | } | ||
1135 | |||
1136 | static int lo_ioctl(struct inode * inode, struct file * file, | ||
1137 | unsigned int cmd, unsigned long arg) | ||
1138 | { | ||
1139 | struct loop_device *lo = inode->i_bdev->bd_disk->private_data; | ||
1140 | int err; | ||
1141 | |||
1142 | down(&lo->lo_ctl_mutex); | ||
1143 | switch (cmd) { | ||
1144 | case LOOP_SET_FD: | ||
1145 | err = loop_set_fd(lo, file, inode->i_bdev, arg); | ||
1146 | break; | ||
1147 | case LOOP_CHANGE_FD: | ||
1148 | err = loop_change_fd(lo, file, inode->i_bdev, arg); | ||
1149 | break; | ||
1150 | case LOOP_CLR_FD: | ||
1151 | err = loop_clr_fd(lo, inode->i_bdev); | ||
1152 | break; | ||
1153 | case LOOP_SET_STATUS: | ||
1154 | err = loop_set_status_old(lo, (struct loop_info __user *) arg); | ||
1155 | break; | ||
1156 | case LOOP_GET_STATUS: | ||
1157 | err = loop_get_status_old(lo, (struct loop_info __user *) arg); | ||
1158 | break; | ||
1159 | case LOOP_SET_STATUS64: | ||
1160 | err = loop_set_status64(lo, (struct loop_info64 __user *) arg); | ||
1161 | break; | ||
1162 | case LOOP_GET_STATUS64: | ||
1163 | err = loop_get_status64(lo, (struct loop_info64 __user *) arg); | ||
1164 | break; | ||
1165 | default: | ||
1166 | err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; | ||
1167 | } | ||
1168 | up(&lo->lo_ctl_mutex); | ||
1169 | return err; | ||
1170 | } | ||
1171 | |||
1172 | static int lo_open(struct inode *inode, struct file *file) | ||
1173 | { | ||
1174 | struct loop_device *lo = inode->i_bdev->bd_disk->private_data; | ||
1175 | |||
1176 | down(&lo->lo_ctl_mutex); | ||
1177 | lo->lo_refcnt++; | ||
1178 | up(&lo->lo_ctl_mutex); | ||
1179 | |||
1180 | return 0; | ||
1181 | } | ||
1182 | |||
1183 | static int lo_release(struct inode *inode, struct file *file) | ||
1184 | { | ||
1185 | struct loop_device *lo = inode->i_bdev->bd_disk->private_data; | ||
1186 | |||
1187 | down(&lo->lo_ctl_mutex); | ||
1188 | --lo->lo_refcnt; | ||
1189 | up(&lo->lo_ctl_mutex); | ||
1190 | |||
1191 | return 0; | ||
1192 | } | ||
1193 | |||
1194 | static struct block_device_operations lo_fops = { | ||
1195 | .owner = THIS_MODULE, | ||
1196 | .open = lo_open, | ||
1197 | .release = lo_release, | ||
1198 | .ioctl = lo_ioctl, | ||
1199 | }; | ||
1200 | |||
1201 | /* | ||
1202 | * And now the modules code and kernel interface. | ||
1203 | */ | ||
1204 | module_param(max_loop, int, 0); | ||
1205 | MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)"); | ||
1206 | MODULE_LICENSE("GPL"); | ||
1207 | MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); | ||
1208 | |||
1209 | int loop_register_transfer(struct loop_func_table *funcs) | ||
1210 | { | ||
1211 | unsigned int n = funcs->number; | ||
1212 | |||
1213 | if (n >= MAX_LO_CRYPT || xfer_funcs[n]) | ||
1214 | return -EINVAL; | ||
1215 | xfer_funcs[n] = funcs; | ||
1216 | return 0; | ||
1217 | } | ||
1218 | |||
1219 | int loop_unregister_transfer(int number) | ||
1220 | { | ||
1221 | unsigned int n = number; | ||
1222 | struct loop_device *lo; | ||
1223 | struct loop_func_table *xfer; | ||
1224 | |||
1225 | if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) | ||
1226 | return -EINVAL; | ||
1227 | |||
1228 | xfer_funcs[n] = NULL; | ||
1229 | |||
1230 | for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) { | ||
1231 | down(&lo->lo_ctl_mutex); | ||
1232 | |||
1233 | if (lo->lo_encryption == xfer) | ||
1234 | loop_release_xfer(lo); | ||
1235 | |||
1236 | up(&lo->lo_ctl_mutex); | ||
1237 | } | ||
1238 | |||
1239 | return 0; | ||
1240 | } | ||
1241 | |||
1242 | EXPORT_SYMBOL(loop_register_transfer); | ||
1243 | EXPORT_SYMBOL(loop_unregister_transfer); | ||
1244 | |||
1245 | static int __init loop_init(void) | ||
1246 | { | ||
1247 | int i; | ||
1248 | |||
1249 | if (max_loop < 1 || max_loop > 256) { | ||
1250 | printk(KERN_WARNING "loop: invalid max_loop (must be between" | ||
1251 | " 1 and 256), using default (8)\n"); | ||
1252 | max_loop = 8; | ||
1253 | } | ||
1254 | |||
1255 | if (register_blkdev(LOOP_MAJOR, "loop")) | ||
1256 | return -EIO; | ||
1257 | |||
1258 | loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL); | ||
1259 | if (!loop_dev) | ||
1260 | goto out_mem1; | ||
1261 | memset(loop_dev, 0, max_loop * sizeof(struct loop_device)); | ||
1262 | |||
1263 | disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL); | ||
1264 | if (!disks) | ||
1265 | goto out_mem2; | ||
1266 | |||
1267 | for (i = 0; i < max_loop; i++) { | ||
1268 | disks[i] = alloc_disk(1); | ||
1269 | if (!disks[i]) | ||
1270 | goto out_mem3; | ||
1271 | } | ||
1272 | |||
1273 | devfs_mk_dir("loop"); | ||
1274 | |||
1275 | for (i = 0; i < max_loop; i++) { | ||
1276 | struct loop_device *lo = &loop_dev[i]; | ||
1277 | struct gendisk *disk = disks[i]; | ||
1278 | |||
1279 | memset(lo, 0, sizeof(*lo)); | ||
1280 | lo->lo_queue = blk_alloc_queue(GFP_KERNEL); | ||
1281 | if (!lo->lo_queue) | ||
1282 | goto out_mem4; | ||
1283 | init_MUTEX(&lo->lo_ctl_mutex); | ||
1284 | init_MUTEX_LOCKED(&lo->lo_sem); | ||
1285 | init_MUTEX_LOCKED(&lo->lo_bh_mutex); | ||
1286 | lo->lo_number = i; | ||
1287 | spin_lock_init(&lo->lo_lock); | ||
1288 | disk->major = LOOP_MAJOR; | ||
1289 | disk->first_minor = i; | ||
1290 | disk->fops = &lo_fops; | ||
1291 | sprintf(disk->disk_name, "loop%d", i); | ||
1292 | sprintf(disk->devfs_name, "loop/%d", i); | ||
1293 | disk->private_data = lo; | ||
1294 | disk->queue = lo->lo_queue; | ||
1295 | } | ||
1296 | |||
1297 | /* We cannot fail after we call this, so another loop!*/ | ||
1298 | for (i = 0; i < max_loop; i++) | ||
1299 | add_disk(disks[i]); | ||
1300 | printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); | ||
1301 | return 0; | ||
1302 | |||
1303 | out_mem4: | ||
1304 | while (i--) | ||
1305 | blk_put_queue(loop_dev[i].lo_queue); | ||
1306 | devfs_remove("loop"); | ||
1307 | i = max_loop; | ||
1308 | out_mem3: | ||
1309 | while (i--) | ||
1310 | put_disk(disks[i]); | ||
1311 | kfree(disks); | ||
1312 | out_mem2: | ||
1313 | kfree(loop_dev); | ||
1314 | out_mem1: | ||
1315 | unregister_blkdev(LOOP_MAJOR, "loop"); | ||
1316 | printk(KERN_ERR "loop: ran out of memory\n"); | ||
1317 | return -ENOMEM; | ||
1318 | } | ||
1319 | |||
1320 | static void loop_exit(void) | ||
1321 | { | ||
1322 | int i; | ||
1323 | |||
1324 | for (i = 0; i < max_loop; i++) { | ||
1325 | del_gendisk(disks[i]); | ||
1326 | blk_put_queue(loop_dev[i].lo_queue); | ||
1327 | put_disk(disks[i]); | ||
1328 | } | ||
1329 | devfs_remove("loop"); | ||
1330 | if (unregister_blkdev(LOOP_MAJOR, "loop")) | ||
1331 | printk(KERN_WARNING "loop: cannot unregister blkdev\n"); | ||
1332 | |||
1333 | kfree(disks); | ||
1334 | kfree(loop_dev); | ||
1335 | } | ||
1336 | |||
1337 | module_init(loop_init); | ||
1338 | module_exit(loop_exit); | ||
1339 | |||
1340 | #ifndef MODULE | ||
1341 | static int __init max_loop_setup(char *str) | ||
1342 | { | ||
1343 | max_loop = simple_strtol(str, NULL, 0); | ||
1344 | return 1; | ||
1345 | } | ||
1346 | |||
1347 | __setup("max_loop=", max_loop_setup); | ||
1348 | #endif | ||