diff options
Diffstat (limited to 'drivers/block/rd.c')
-rw-r--r-- | drivers/block/rd.c | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/drivers/block/rd.c b/drivers/block/rd.c new file mode 100644 index 000000000000..145c1fbffe01 --- /dev/null +++ b/drivers/block/rd.c | |||
@@ -0,0 +1,515 @@ | |||
1 | /* | ||
2 | * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. | ||
3 | * | ||
4 | * (C) Chad Page, Theodore Ts'o, et. al, 1995. | ||
5 | * | ||
6 | * This RAM disk is designed to have filesystems created on it and mounted | ||
7 | * just like a regular floppy disk. | ||
8 | * | ||
9 | * It also does something suggested by Linus: use the buffer cache as the | ||
10 | * RAM disk data. This makes it possible to dynamically allocate the RAM disk | ||
11 | * buffer - with some consequences I have to deal with as I write this. | ||
12 | * | ||
13 | * This code is based on the original ramdisk.c, written mostly by | ||
14 | * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by | ||
15 | * Chad Page to use the buffer cache to store the RAM disk data in | ||
16 | * 1995; Theodore then took over the driver again, and cleaned it up | ||
17 | * for inclusion in the mainline kernel. | ||
18 | * | ||
19 | * The original CRAMDISK code was written by Richard Lyons, and | ||
20 | * adapted by Chad Page to use the new RAM disk interface. Theodore | ||
21 | * Ts'o rewrote it so that both the compressed RAM disk loader and the | ||
22 | * kernel decompressor uses the same inflate.c codebase. The RAM disk | ||
23 | * loader now also loads into a dynamic (buffer cache based) RAM disk, | ||
24 | * not the old static RAM disk. Support for the old static RAM disk has | ||
25 | * been completely removed. | ||
26 | * | ||
27 | * Loadable module support added by Tom Dyas. | ||
28 | * | ||
29 | * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): | ||
30 | * Cosmetic changes in #ifdef MODULE, code movement, etc. | ||
31 | * When the RAM disk module is removed, free the protected buffers | ||
32 | * Default RAM disk size changed to 2.88 MB | ||
33 | * | ||
34 | * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 | ||
35 | * | ||
36 | * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) | ||
37 | * - Chad Page | ||
38 | * | ||
39 | * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 | ||
40 | * | ||
41 | * Make block size and block size shift for RAM disks a global macro | ||
42 | * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 | ||
43 | */ | ||
44 | |||
45 | #include <linux/config.h> | ||
46 | #include <linux/string.h> | ||
47 | #include <linux/slab.h> | ||
48 | #include <asm/atomic.h> | ||
49 | #include <linux/bio.h> | ||
50 | #include <linux/module.h> | ||
51 | #include <linux/moduleparam.h> | ||
52 | #include <linux/init.h> | ||
53 | #include <linux/devfs_fs_kernel.h> | ||
54 | #include <linux/pagemap.h> | ||
55 | #include <linux/blkdev.h> | ||
56 | #include <linux/genhd.h> | ||
57 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ | ||
58 | #include <linux/backing-dev.h> | ||
59 | #include <linux/blkpg.h> | ||
60 | #include <linux/writeback.h> | ||
61 | |||
62 | #include <asm/uaccess.h> | ||
63 | |||
64 | /* Various static variables go here. Most are used only in the RAM disk code. | ||
65 | */ | ||
66 | |||
67 | static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; | ||
68 | static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */ | ||
69 | static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT]; | ||
70 | |||
71 | /* | ||
72 | * Parameters for the boot-loading of the RAM disk. These are set by | ||
73 | * init/main.c (from arguments to the kernel command line) or from the | ||
74 | * architecture-specific setup routine (from the stored boot sector | ||
75 | * information). | ||
76 | */ | ||
77 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ | ||
78 | /* | ||
79 | * It would be very desirable to have a soft-blocksize (that in the case | ||
80 | * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because | ||
81 | * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of | ||
82 | * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages | ||
83 | * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only | ||
84 | * 1 page will be protected. Depending on the size of the ramdisk you | ||
85 | * may want to change the ramdisk blocksize to achieve a better or worse MM | ||
86 | * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that | ||
87 | * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). | ||
88 | */ | ||
89 | static int rd_blocksize = BLOCK_SIZE; /* blocksize of the RAM disks */ | ||
90 | |||
91 | /* | ||
92 | * Copyright (C) 2000 Linus Torvalds. | ||
93 | * 2000 Transmeta Corp. | ||
94 | * aops copied from ramfs. | ||
95 | */ | ||
96 | |||
97 | /* | ||
98 | * If a ramdisk page has buffers, some may be uptodate and some may be not. | ||
99 | * To bring the page uptodate we zero out the non-uptodate buffers. The | ||
100 | * page must be locked. | ||
101 | */ | ||
102 | static void make_page_uptodate(struct page *page) | ||
103 | { | ||
104 | if (page_has_buffers(page)) { | ||
105 | struct buffer_head *bh = page_buffers(page); | ||
106 | struct buffer_head *head = bh; | ||
107 | |||
108 | do { | ||
109 | if (!buffer_uptodate(bh)) { | ||
110 | memset(bh->b_data, 0, bh->b_size); | ||
111 | /* | ||
112 | * akpm: I'm totally undecided about this. The | ||
113 | * buffer has just been magically brought "up to | ||
114 | * date", but nobody should want to be reading | ||
115 | * it anyway, because it hasn't been used for | ||
116 | * anything yet. It is still in a "not read | ||
117 | * from disk yet" state. | ||
118 | * | ||
119 | * But non-uptodate buffers against an uptodate | ||
120 | * page are against the rules. So do it anyway. | ||
121 | */ | ||
122 | set_buffer_uptodate(bh); | ||
123 | } | ||
124 | } while ((bh = bh->b_this_page) != head); | ||
125 | } else { | ||
126 | memset(page_address(page), 0, PAGE_CACHE_SIZE); | ||
127 | } | ||
128 | flush_dcache_page(page); | ||
129 | SetPageUptodate(page); | ||
130 | } | ||
131 | |||
132 | static int ramdisk_readpage(struct file *file, struct page *page) | ||
133 | { | ||
134 | if (!PageUptodate(page)) | ||
135 | make_page_uptodate(page); | ||
136 | unlock_page(page); | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | static int ramdisk_prepare_write(struct file *file, struct page *page, | ||
141 | unsigned offset, unsigned to) | ||
142 | { | ||
143 | if (!PageUptodate(page)) | ||
144 | make_page_uptodate(page); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static int ramdisk_commit_write(struct file *file, struct page *page, | ||
149 | unsigned offset, unsigned to) | ||
150 | { | ||
151 | set_page_dirty(page); | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * ->writepage to the the blockdev's mapping has to redirty the page so that the | ||
157 | * VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM | ||
158 | * won't try to (pointlessly) write the page again for a while. | ||
159 | * | ||
160 | * Really, these pages should not be on the LRU at all. | ||
161 | */ | ||
162 | static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) | ||
163 | { | ||
164 | if (!PageUptodate(page)) | ||
165 | make_page_uptodate(page); | ||
166 | SetPageDirty(page); | ||
167 | if (wbc->for_reclaim) | ||
168 | return WRITEPAGE_ACTIVATE; | ||
169 | unlock_page(page); | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * This is a little speedup thing: short-circuit attempts to write back the | ||
175 | * ramdisk blockdev inode to its non-existent backing store. | ||
176 | */ | ||
177 | static int ramdisk_writepages(struct address_space *mapping, | ||
178 | struct writeback_control *wbc) | ||
179 | { | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * ramdisk blockdev pages have their own ->set_page_dirty() because we don't | ||
185 | * want them to contribute to dirty memory accounting. | ||
186 | */ | ||
187 | static int ramdisk_set_page_dirty(struct page *page) | ||
188 | { | ||
189 | SetPageDirty(page); | ||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | static struct address_space_operations ramdisk_aops = { | ||
194 | .readpage = ramdisk_readpage, | ||
195 | .prepare_write = ramdisk_prepare_write, | ||
196 | .commit_write = ramdisk_commit_write, | ||
197 | .writepage = ramdisk_writepage, | ||
198 | .set_page_dirty = ramdisk_set_page_dirty, | ||
199 | .writepages = ramdisk_writepages, | ||
200 | }; | ||
201 | |||
202 | static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, | ||
203 | struct address_space *mapping) | ||
204 | { | ||
205 | pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); | ||
206 | unsigned int vec_offset = vec->bv_offset; | ||
207 | int offset = (sector << 9) & ~PAGE_CACHE_MASK; | ||
208 | int size = vec->bv_len; | ||
209 | int err = 0; | ||
210 | |||
211 | do { | ||
212 | int count; | ||
213 | struct page *page; | ||
214 | char *src; | ||
215 | char *dst; | ||
216 | |||
217 | count = PAGE_CACHE_SIZE - offset; | ||
218 | if (count > size) | ||
219 | count = size; | ||
220 | size -= count; | ||
221 | |||
222 | page = grab_cache_page(mapping, index); | ||
223 | if (!page) { | ||
224 | err = -ENOMEM; | ||
225 | goto out; | ||
226 | } | ||
227 | |||
228 | if (!PageUptodate(page)) | ||
229 | make_page_uptodate(page); | ||
230 | |||
231 | index++; | ||
232 | |||
233 | if (rw == READ) { | ||
234 | src = kmap_atomic(page, KM_USER0) + offset; | ||
235 | dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; | ||
236 | } else { | ||
237 | src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; | ||
238 | dst = kmap_atomic(page, KM_USER1) + offset; | ||
239 | } | ||
240 | offset = 0; | ||
241 | vec_offset += count; | ||
242 | |||
243 | memcpy(dst, src, count); | ||
244 | |||
245 | kunmap_atomic(src, KM_USER0); | ||
246 | kunmap_atomic(dst, KM_USER1); | ||
247 | |||
248 | if (rw == READ) | ||
249 | flush_dcache_page(vec->bv_page); | ||
250 | else | ||
251 | set_page_dirty(page); | ||
252 | unlock_page(page); | ||
253 | put_page(page); | ||
254 | } while (size); | ||
255 | |||
256 | out: | ||
257 | return err; | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Basically, my strategy here is to set up a buffer-head which can't be | ||
262 | * deleted, and make that my Ramdisk. If the request is outside of the | ||
263 | * allocated size, we must get rid of it... | ||
264 | * | ||
265 | * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support | ||
266 | * | ||
267 | */ | ||
268 | static int rd_make_request(request_queue_t *q, struct bio *bio) | ||
269 | { | ||
270 | struct block_device *bdev = bio->bi_bdev; | ||
271 | struct address_space * mapping = bdev->bd_inode->i_mapping; | ||
272 | sector_t sector = bio->bi_sector; | ||
273 | unsigned long len = bio->bi_size >> 9; | ||
274 | int rw = bio_data_dir(bio); | ||
275 | struct bio_vec *bvec; | ||
276 | int ret = 0, i; | ||
277 | |||
278 | if (sector + len > get_capacity(bdev->bd_disk)) | ||
279 | goto fail; | ||
280 | |||
281 | if (rw==READA) | ||
282 | rw=READ; | ||
283 | |||
284 | bio_for_each_segment(bvec, bio, i) { | ||
285 | ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping); | ||
286 | sector += bvec->bv_len >> 9; | ||
287 | } | ||
288 | if (ret) | ||
289 | goto fail; | ||
290 | |||
291 | bio_endio(bio, bio->bi_size, 0); | ||
292 | return 0; | ||
293 | fail: | ||
294 | bio_io_error(bio, bio->bi_size); | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | static int rd_ioctl(struct inode *inode, struct file *file, | ||
299 | unsigned int cmd, unsigned long arg) | ||
300 | { | ||
301 | int error; | ||
302 | struct block_device *bdev = inode->i_bdev; | ||
303 | |||
304 | if (cmd != BLKFLSBUF) | ||
305 | return -ENOTTY; | ||
306 | |||
307 | /* | ||
308 | * special: we want to release the ramdisk memory, it's not like with | ||
309 | * the other blockdevices where this ioctl only flushes away the buffer | ||
310 | * cache | ||
311 | */ | ||
312 | error = -EBUSY; | ||
313 | down(&bdev->bd_sem); | ||
314 | if (bdev->bd_openers <= 2) { | ||
315 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | ||
316 | error = 0; | ||
317 | } | ||
318 | up(&bdev->bd_sem); | ||
319 | return error; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * This is the backing_dev_info for the blockdev inode itself. It doesn't need | ||
324 | * writeback and it does not contribute to dirty memory accounting. | ||
325 | */ | ||
326 | static struct backing_dev_info rd_backing_dev_info = { | ||
327 | .ra_pages = 0, /* No readahead */ | ||
328 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY, | ||
329 | .unplug_io_fn = default_unplug_io_fn, | ||
330 | }; | ||
331 | |||
332 | /* | ||
333 | * This is the backing_dev_info for the files which live atop the ramdisk | ||
334 | * "device". These files do need writeback and they do contribute to dirty | ||
335 | * memory accounting. | ||
336 | */ | ||
337 | static struct backing_dev_info rd_file_backing_dev_info = { | ||
338 | .ra_pages = 0, /* No readahead */ | ||
339 | .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */ | ||
340 | .unplug_io_fn = default_unplug_io_fn, | ||
341 | }; | ||
342 | |||
343 | static int rd_open(struct inode *inode, struct file *filp) | ||
344 | { | ||
345 | unsigned unit = iminor(inode); | ||
346 | |||
347 | if (rd_bdev[unit] == NULL) { | ||
348 | struct block_device *bdev = inode->i_bdev; | ||
349 | struct address_space *mapping; | ||
350 | unsigned bsize; | ||
351 | int gfp_mask; | ||
352 | |||
353 | inode = igrab(bdev->bd_inode); | ||
354 | rd_bdev[unit] = bdev; | ||
355 | bdev->bd_openers++; | ||
356 | bsize = bdev_hardsect_size(bdev); | ||
357 | bdev->bd_block_size = bsize; | ||
358 | inode->i_blkbits = blksize_bits(bsize); | ||
359 | inode->i_size = get_capacity(bdev->bd_disk)<<9; | ||
360 | |||
361 | mapping = inode->i_mapping; | ||
362 | mapping->a_ops = &ramdisk_aops; | ||
363 | mapping->backing_dev_info = &rd_backing_dev_info; | ||
364 | bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info; | ||
365 | |||
366 | /* | ||
367 | * Deep badness. rd_blkdev_pagecache_IO() needs to allocate | ||
368 | * pagecache pages within a request_fn. We cannot recur back | ||
369 | * into the filesytem which is mounted atop the ramdisk, because | ||
370 | * that would deadlock on fs locks. And we really don't want | ||
371 | * to reenter rd_blkdev_pagecache_IO when we're already within | ||
372 | * that function. | ||
373 | * | ||
374 | * So we turn off __GFP_FS and __GFP_IO. | ||
375 | * | ||
376 | * And to give this thing a hope of working, turn on __GFP_HIGH. | ||
377 | * Hopefully, there's enough regular memory allocation going on | ||
378 | * for the page allocator emergency pools to keep the ramdisk | ||
379 | * driver happy. | ||
380 | */ | ||
381 | gfp_mask = mapping_gfp_mask(mapping); | ||
382 | gfp_mask &= ~(__GFP_FS|__GFP_IO); | ||
383 | gfp_mask |= __GFP_HIGH; | ||
384 | mapping_set_gfp_mask(mapping, gfp_mask); | ||
385 | } | ||
386 | |||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | static struct block_device_operations rd_bd_op = { | ||
391 | .owner = THIS_MODULE, | ||
392 | .open = rd_open, | ||
393 | .ioctl = rd_ioctl, | ||
394 | }; | ||
395 | |||
396 | /* | ||
397 | * Before freeing the module, invalidate all of the protected buffers! | ||
398 | */ | ||
399 | static void __exit rd_cleanup(void) | ||
400 | { | ||
401 | int i; | ||
402 | |||
403 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | ||
404 | struct block_device *bdev = rd_bdev[i]; | ||
405 | rd_bdev[i] = NULL; | ||
406 | if (bdev) { | ||
407 | invalidate_bdev(bdev, 1); | ||
408 | blkdev_put(bdev); | ||
409 | } | ||
410 | del_gendisk(rd_disks[i]); | ||
411 | put_disk(rd_disks[i]); | ||
412 | blk_cleanup_queue(rd_queue[i]); | ||
413 | } | ||
414 | devfs_remove("rd"); | ||
415 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | ||
416 | } | ||
417 | |||
418 | /* | ||
419 | * This is the registration and initialization section of the RAM disk driver | ||
420 | */ | ||
421 | static int __init rd_init(void) | ||
422 | { | ||
423 | int i; | ||
424 | int err = -ENOMEM; | ||
425 | |||
426 | if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || | ||
427 | (rd_blocksize & (rd_blocksize-1))) { | ||
428 | printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", | ||
429 | rd_blocksize); | ||
430 | rd_blocksize = BLOCK_SIZE; | ||
431 | } | ||
432 | |||
433 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | ||
434 | rd_disks[i] = alloc_disk(1); | ||
435 | if (!rd_disks[i]) | ||
436 | goto out; | ||
437 | } | ||
438 | |||
439 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) { | ||
440 | err = -EIO; | ||
441 | goto out; | ||
442 | } | ||
443 | |||
444 | devfs_mk_dir("rd"); | ||
445 | |||
446 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | ||
447 | struct gendisk *disk = rd_disks[i]; | ||
448 | |||
449 | rd_queue[i] = blk_alloc_queue(GFP_KERNEL); | ||
450 | if (!rd_queue[i]) | ||
451 | goto out_queue; | ||
452 | |||
453 | blk_queue_make_request(rd_queue[i], &rd_make_request); | ||
454 | blk_queue_hardsect_size(rd_queue[i], rd_blocksize); | ||
455 | |||
456 | /* rd_size is given in kB */ | ||
457 | disk->major = RAMDISK_MAJOR; | ||
458 | disk->first_minor = i; | ||
459 | disk->fops = &rd_bd_op; | ||
460 | disk->queue = rd_queue[i]; | ||
461 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | ||
462 | sprintf(disk->disk_name, "ram%d", i); | ||
463 | sprintf(disk->devfs_name, "rd/%d", i); | ||
464 | set_capacity(disk, rd_size * 2); | ||
465 | add_disk(rd_disks[i]); | ||
466 | } | ||
467 | |||
468 | /* rd_size is given in kB */ | ||
469 | printk("RAMDISK driver initialized: " | ||
470 | "%d RAM disks of %dK size %d blocksize\n", | ||
471 | CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize); | ||
472 | |||
473 | return 0; | ||
474 | out_queue: | ||
475 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | ||
476 | out: | ||
477 | while (i--) { | ||
478 | put_disk(rd_disks[i]); | ||
479 | blk_cleanup_queue(rd_queue[i]); | ||
480 | } | ||
481 | return err; | ||
482 | } | ||
483 | |||
484 | module_init(rd_init); | ||
485 | module_exit(rd_cleanup); | ||
486 | |||
487 | /* options - nonmodular */ | ||
488 | #ifndef MODULE | ||
489 | static int __init ramdisk_size(char *str) | ||
490 | { | ||
491 | rd_size = simple_strtol(str,NULL,0); | ||
492 | return 1; | ||
493 | } | ||
494 | static int __init ramdisk_size2(char *str) /* kludge */ | ||
495 | { | ||
496 | return ramdisk_size(str); | ||
497 | } | ||
498 | static int __init ramdisk_blocksize(char *str) | ||
499 | { | ||
500 | rd_blocksize = simple_strtol(str,NULL,0); | ||
501 | return 1; | ||
502 | } | ||
503 | __setup("ramdisk=", ramdisk_size); | ||
504 | __setup("ramdisk_size=", ramdisk_size2); | ||
505 | __setup("ramdisk_blocksize=", ramdisk_blocksize); | ||
506 | #endif | ||
507 | |||
508 | /* options - modular */ | ||
509 | module_param(rd_size, int, 0); | ||
510 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); | ||
511 | module_param(rd_blocksize, int, 0); | ||
512 | MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); | ||
513 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | ||
514 | |||
515 | MODULE_LICENSE("GPL"); | ||