Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. | |
3 | * | |
4 | * (C) Chad Page, Theodore Ts'o, et. al, 1995. | |
5 | * | |
6 | * This RAM disk is designed to have filesystems created on it and mounted | |
7 | * just like a regular floppy disk. | |
8 | * | |
9 | * It also does something suggested by Linus: use the buffer cache as the | |
10 | * RAM disk data. This makes it possible to dynamically allocate the RAM disk | |
11 | * buffer - with some consequences I have to deal with as I write this. | |
12 | * | |
13 | * This code is based on the original ramdisk.c, written mostly by | |
14 | * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by | |
15 | * Chad Page to use the buffer cache to store the RAM disk data in | |
16 | * 1995; Theodore then took over the driver again, and cleaned it up | |
17 | * for inclusion in the mainline kernel. | |
18 | * | |
19 | * The original CRAMDISK code was written by Richard Lyons, and | |
20 | * adapted by Chad Page to use the new RAM disk interface. Theodore | |
21 | * Ts'o rewrote it so that both the compressed RAM disk loader and the | |
22 | * kernel decompressor uses the same inflate.c codebase. The RAM disk | |
23 | * loader now also loads into a dynamic (buffer cache based) RAM disk, | |
24 | * not the old static RAM disk. Support for the old static RAM disk has | |
25 | * been completely removed. | |
26 | * | |
27 | * Loadable module support added by Tom Dyas. | |
28 | * | |
29 | * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): | |
30 | * Cosmetic changes in #ifdef MODULE, code movement, etc. | |
31 | * When the RAM disk module is removed, free the protected buffers | |
32 | * Default RAM disk size changed to 2.88 MB | |
33 | * | |
34 | * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 | |
35 | * | |
36 | * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) | |
37 | * - Chad Page | |
38 | * | |
39 | * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 | |
40 | * | |
41 | * Make block size and block size shift for RAM disks a global macro | |
42 | * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 | |
43 | */ | |
44 | ||
45 | #include <linux/config.h> | |
46 | #include <linux/string.h> | |
47 | #include <linux/slab.h> | |
48 | #include <asm/atomic.h> | |
49 | #include <linux/bio.h> | |
50 | #include <linux/module.h> | |
51 | #include <linux/moduleparam.h> | |
52 | #include <linux/init.h> | |
53 | #include <linux/devfs_fs_kernel.h> | |
54 | #include <linux/pagemap.h> | |
55 | #include <linux/blkdev.h> | |
56 | #include <linux/genhd.h> | |
57 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ | |
58 | #include <linux/backing-dev.h> | |
59 | #include <linux/blkpg.h> | |
60 | #include <linux/writeback.h> | |
61 | ||
62 | #include <asm/uaccess.h> | |
63 | ||
64 | /* Various static variables go here. Most are used only in the RAM disk code. | |
65 | */ | |
66 | ||
67 | static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; | |
68 | static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */ | |
69 | static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT]; | |
70 | ||
71 | /* | |
72 | * Parameters for the boot-loading of the RAM disk. These are set by | |
73 | * init/main.c (from arguments to the kernel command line) or from the | |
74 | * architecture-specific setup routine (from the stored boot sector | |
75 | * information). | |
76 | */ | |
cccf2508 | 77 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ |
1da177e4 LT |
78 | /* |
79 | * It would be very desirable to have a soft-blocksize (that in the case | |
80 | * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because | |
81 | * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of | |
82 | * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages | |
83 | * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only | |
84 | * 1 page will be protected. Depending on the size of the ramdisk you | |
85 | * may want to change the ramdisk blocksize to achieve a better or worse MM | |
86 | * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that | |
87 | * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). | |
88 | */ | |
89 | static int rd_blocksize = BLOCK_SIZE; /* blocksize of the RAM disks */ | |
90 | ||
91 | /* | |
92 | * Copyright (C) 2000 Linus Torvalds. | |
93 | * 2000 Transmeta Corp. | |
94 | * aops copied from ramfs. | |
95 | */ | |
96 | ||
97 | /* | |
98 | * If a ramdisk page has buffers, some may be uptodate and some may be not. | |
99 | * To bring the page uptodate we zero out the non-uptodate buffers. The | |
100 | * page must be locked. | |
101 | */ | |
102 | static void make_page_uptodate(struct page *page) | |
103 | { | |
104 | if (page_has_buffers(page)) { | |
105 | struct buffer_head *bh = page_buffers(page); | |
106 | struct buffer_head *head = bh; | |
107 | ||
108 | do { | |
109 | if (!buffer_uptodate(bh)) { | |
110 | memset(bh->b_data, 0, bh->b_size); | |
111 | /* | |
112 | * akpm: I'm totally undecided about this. The | |
113 | * buffer has just been magically brought "up to | |
114 | * date", but nobody should want to be reading | |
115 | * it anyway, because it hasn't been used for | |
116 | * anything yet. It is still in a "not read | |
117 | * from disk yet" state. | |
118 | * | |
119 | * But non-uptodate buffers against an uptodate | |
120 | * page are against the rules. So do it anyway. | |
121 | */ | |
122 | set_buffer_uptodate(bh); | |
123 | } | |
124 | } while ((bh = bh->b_this_page) != head); | |
125 | } else { | |
126 | memset(page_address(page), 0, PAGE_CACHE_SIZE); | |
127 | } | |
128 | flush_dcache_page(page); | |
129 | SetPageUptodate(page); | |
130 | } | |
131 | ||
132 | static int ramdisk_readpage(struct file *file, struct page *page) | |
133 | { | |
134 | if (!PageUptodate(page)) | |
135 | make_page_uptodate(page); | |
136 | unlock_page(page); | |
137 | return 0; | |
138 | } | |
139 | ||
140 | static int ramdisk_prepare_write(struct file *file, struct page *page, | |
141 | unsigned offset, unsigned to) | |
142 | { | |
143 | if (!PageUptodate(page)) | |
144 | make_page_uptodate(page); | |
145 | return 0; | |
146 | } | |
147 | ||
148 | static int ramdisk_commit_write(struct file *file, struct page *page, | |
149 | unsigned offset, unsigned to) | |
150 | { | |
151 | set_page_dirty(page); | |
152 | return 0; | |
153 | } | |
154 | ||
155 | /* | |
156 | * ->writepage to the the blockdev's mapping has to redirty the page so that the | |
994fc28c | 157 | * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM |
1da177e4 LT |
158 | * won't try to (pointlessly) write the page again for a while. |
159 | * | |
160 | * Really, these pages should not be on the LRU at all. | |
161 | */ | |
162 | static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) | |
163 | { | |
164 | if (!PageUptodate(page)) | |
165 | make_page_uptodate(page); | |
166 | SetPageDirty(page); | |
167 | if (wbc->for_reclaim) | |
994fc28c | 168 | return AOP_WRITEPAGE_ACTIVATE; |
1da177e4 LT |
169 | unlock_page(page); |
170 | return 0; | |
171 | } | |
172 | ||
173 | /* | |
174 | * This is a little speedup thing: short-circuit attempts to write back the | |
175 | * ramdisk blockdev inode to its non-existent backing store. | |
176 | */ | |
177 | static int ramdisk_writepages(struct address_space *mapping, | |
178 | struct writeback_control *wbc) | |
179 | { | |
180 | return 0; | |
181 | } | |
182 | ||
183 | /* | |
184 | * ramdisk blockdev pages have their own ->set_page_dirty() because we don't | |
185 | * want them to contribute to dirty memory accounting. | |
186 | */ | |
187 | static int ramdisk_set_page_dirty(struct page *page) | |
188 | { | |
189 | SetPageDirty(page); | |
190 | return 0; | |
191 | } | |
192 | ||
193 | static struct address_space_operations ramdisk_aops = { | |
194 | .readpage = ramdisk_readpage, | |
195 | .prepare_write = ramdisk_prepare_write, | |
196 | .commit_write = ramdisk_commit_write, | |
197 | .writepage = ramdisk_writepage, | |
198 | .set_page_dirty = ramdisk_set_page_dirty, | |
199 | .writepages = ramdisk_writepages, | |
200 | }; | |
201 | ||
202 | static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, | |
203 | struct address_space *mapping) | |
204 | { | |
205 | pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); | |
206 | unsigned int vec_offset = vec->bv_offset; | |
207 | int offset = (sector << 9) & ~PAGE_CACHE_MASK; | |
208 | int size = vec->bv_len; | |
209 | int err = 0; | |
210 | ||
211 | do { | |
212 | int count; | |
213 | struct page *page; | |
214 | char *src; | |
215 | char *dst; | |
216 | ||
217 | count = PAGE_CACHE_SIZE - offset; | |
218 | if (count > size) | |
219 | count = size; | |
220 | size -= count; | |
221 | ||
222 | page = grab_cache_page(mapping, index); | |
223 | if (!page) { | |
224 | err = -ENOMEM; | |
225 | goto out; | |
226 | } | |
227 | ||
228 | if (!PageUptodate(page)) | |
229 | make_page_uptodate(page); | |
230 | ||
231 | index++; | |
232 | ||
233 | if (rw == READ) { | |
234 | src = kmap_atomic(page, KM_USER0) + offset; | |
235 | dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; | |
236 | } else { | |
237 | src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; | |
238 | dst = kmap_atomic(page, KM_USER1) + offset; | |
239 | } | |
240 | offset = 0; | |
241 | vec_offset += count; | |
242 | ||
243 | memcpy(dst, src, count); | |
244 | ||
245 | kunmap_atomic(src, KM_USER0); | |
246 | kunmap_atomic(dst, KM_USER1); | |
247 | ||
248 | if (rw == READ) | |
249 | flush_dcache_page(vec->bv_page); | |
250 | else | |
251 | set_page_dirty(page); | |
252 | unlock_page(page); | |
253 | put_page(page); | |
254 | } while (size); | |
255 | ||
256 | out: | |
257 | return err; | |
258 | } | |
259 | ||
260 | /* | |
261 | * Basically, my strategy here is to set up a buffer-head which can't be | |
262 | * deleted, and make that my Ramdisk. If the request is outside of the | |
263 | * allocated size, we must get rid of it... | |
264 | * | |
265 | * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support | |
266 | * | |
267 | */ | |
268 | static int rd_make_request(request_queue_t *q, struct bio *bio) | |
269 | { | |
270 | struct block_device *bdev = bio->bi_bdev; | |
271 | struct address_space * mapping = bdev->bd_inode->i_mapping; | |
272 | sector_t sector = bio->bi_sector; | |
273 | unsigned long len = bio->bi_size >> 9; | |
274 | int rw = bio_data_dir(bio); | |
275 | struct bio_vec *bvec; | |
276 | int ret = 0, i; | |
277 | ||
278 | if (sector + len > get_capacity(bdev->bd_disk)) | |
279 | goto fail; | |
280 | ||
281 | if (rw==READA) | |
282 | rw=READ; | |
283 | ||
284 | bio_for_each_segment(bvec, bio, i) { | |
285 | ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping); | |
286 | sector += bvec->bv_len >> 9; | |
287 | } | |
288 | if (ret) | |
289 | goto fail; | |
290 | ||
291 | bio_endio(bio, bio->bi_size, 0); | |
292 | return 0; | |
293 | fail: | |
294 | bio_io_error(bio, bio->bi_size); | |
295 | return 0; | |
296 | } | |
297 | ||
298 | static int rd_ioctl(struct inode *inode, struct file *file, | |
299 | unsigned int cmd, unsigned long arg) | |
300 | { | |
301 | int error; | |
302 | struct block_device *bdev = inode->i_bdev; | |
303 | ||
304 | if (cmd != BLKFLSBUF) | |
305 | return -ENOTTY; | |
306 | ||
307 | /* | |
308 | * special: we want to release the ramdisk memory, it's not like with | |
309 | * the other blockdevices where this ioctl only flushes away the buffer | |
310 | * cache | |
311 | */ | |
312 | error = -EBUSY; | |
313 | down(&bdev->bd_sem); | |
314 | if (bdev->bd_openers <= 2) { | |
315 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | |
316 | error = 0; | |
317 | } | |
318 | up(&bdev->bd_sem); | |
319 | return error; | |
320 | } | |
321 | ||
322 | /* | |
323 | * This is the backing_dev_info for the blockdev inode itself. It doesn't need | |
324 | * writeback and it does not contribute to dirty memory accounting. | |
325 | */ | |
326 | static struct backing_dev_info rd_backing_dev_info = { | |
327 | .ra_pages = 0, /* No readahead */ | |
328 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY, | |
329 | .unplug_io_fn = default_unplug_io_fn, | |
330 | }; | |
331 | ||
332 | /* | |
333 | * This is the backing_dev_info for the files which live atop the ramdisk | |
334 | * "device". These files do need writeback and they do contribute to dirty | |
335 | * memory accounting. | |
336 | */ | |
337 | static struct backing_dev_info rd_file_backing_dev_info = { | |
338 | .ra_pages = 0, /* No readahead */ | |
339 | .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */ | |
340 | .unplug_io_fn = default_unplug_io_fn, | |
341 | }; | |
342 | ||
343 | static int rd_open(struct inode *inode, struct file *filp) | |
344 | { | |
345 | unsigned unit = iminor(inode); | |
346 | ||
347 | if (rd_bdev[unit] == NULL) { | |
348 | struct block_device *bdev = inode->i_bdev; | |
349 | struct address_space *mapping; | |
350 | unsigned bsize; | |
b4e3ca1a | 351 | gfp_t gfp_mask; |
1da177e4 LT |
352 | |
353 | inode = igrab(bdev->bd_inode); | |
354 | rd_bdev[unit] = bdev; | |
355 | bdev->bd_openers++; | |
356 | bsize = bdev_hardsect_size(bdev); | |
357 | bdev->bd_block_size = bsize; | |
358 | inode->i_blkbits = blksize_bits(bsize); | |
359 | inode->i_size = get_capacity(bdev->bd_disk)<<9; | |
360 | ||
361 | mapping = inode->i_mapping; | |
362 | mapping->a_ops = &ramdisk_aops; | |
363 | mapping->backing_dev_info = &rd_backing_dev_info; | |
364 | bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info; | |
365 | ||
366 | /* | |
367 | * Deep badness. rd_blkdev_pagecache_IO() needs to allocate | |
368 | * pagecache pages within a request_fn. We cannot recur back | |
369 | * into the filesytem which is mounted atop the ramdisk, because | |
370 | * that would deadlock on fs locks. And we really don't want | |
371 | * to reenter rd_blkdev_pagecache_IO when we're already within | |
372 | * that function. | |
373 | * | |
374 | * So we turn off __GFP_FS and __GFP_IO. | |
375 | * | |
376 | * And to give this thing a hope of working, turn on __GFP_HIGH. | |
377 | * Hopefully, there's enough regular memory allocation going on | |
378 | * for the page allocator emergency pools to keep the ramdisk | |
379 | * driver happy. | |
380 | */ | |
381 | gfp_mask = mapping_gfp_mask(mapping); | |
382 | gfp_mask &= ~(__GFP_FS|__GFP_IO); | |
383 | gfp_mask |= __GFP_HIGH; | |
384 | mapping_set_gfp_mask(mapping, gfp_mask); | |
385 | } | |
386 | ||
387 | return 0; | |
388 | } | |
389 | ||
390 | static struct block_device_operations rd_bd_op = { | |
391 | .owner = THIS_MODULE, | |
392 | .open = rd_open, | |
393 | .ioctl = rd_ioctl, | |
394 | }; | |
395 | ||
396 | /* | |
397 | * Before freeing the module, invalidate all of the protected buffers! | |
398 | */ | |
399 | static void __exit rd_cleanup(void) | |
400 | { | |
401 | int i; | |
402 | ||
403 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
404 | struct block_device *bdev = rd_bdev[i]; | |
405 | rd_bdev[i] = NULL; | |
406 | if (bdev) { | |
407 | invalidate_bdev(bdev, 1); | |
408 | blkdev_put(bdev); | |
409 | } | |
410 | del_gendisk(rd_disks[i]); | |
411 | put_disk(rd_disks[i]); | |
412 | blk_cleanup_queue(rd_queue[i]); | |
413 | } | |
414 | devfs_remove("rd"); | |
415 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | |
416 | } | |
417 | ||
418 | /* | |
419 | * This is the registration and initialization section of the RAM disk driver | |
420 | */ | |
421 | static int __init rd_init(void) | |
422 | { | |
423 | int i; | |
424 | int err = -ENOMEM; | |
425 | ||
426 | if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || | |
427 | (rd_blocksize & (rd_blocksize-1))) { | |
428 | printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", | |
429 | rd_blocksize); | |
430 | rd_blocksize = BLOCK_SIZE; | |
431 | } | |
432 | ||
433 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
434 | rd_disks[i] = alloc_disk(1); | |
435 | if (!rd_disks[i]) | |
436 | goto out; | |
437 | } | |
438 | ||
439 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) { | |
440 | err = -EIO; | |
441 | goto out; | |
442 | } | |
443 | ||
444 | devfs_mk_dir("rd"); | |
445 | ||
446 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
447 | struct gendisk *disk = rd_disks[i]; | |
448 | ||
449 | rd_queue[i] = blk_alloc_queue(GFP_KERNEL); | |
450 | if (!rd_queue[i]) | |
451 | goto out_queue; | |
452 | ||
453 | blk_queue_make_request(rd_queue[i], &rd_make_request); | |
454 | blk_queue_hardsect_size(rd_queue[i], rd_blocksize); | |
455 | ||
456 | /* rd_size is given in kB */ | |
457 | disk->major = RAMDISK_MAJOR; | |
458 | disk->first_minor = i; | |
459 | disk->fops = &rd_bd_op; | |
460 | disk->queue = rd_queue[i]; | |
461 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | |
462 | sprintf(disk->disk_name, "ram%d", i); | |
463 | sprintf(disk->devfs_name, "rd/%d", i); | |
464 | set_capacity(disk, rd_size * 2); | |
465 | add_disk(rd_disks[i]); | |
466 | } | |
467 | ||
468 | /* rd_size is given in kB */ | |
469 | printk("RAMDISK driver initialized: " | |
470 | "%d RAM disks of %dK size %d blocksize\n", | |
471 | CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize); | |
472 | ||
473 | return 0; | |
474 | out_queue: | |
475 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | |
476 | out: | |
477 | while (i--) { | |
478 | put_disk(rd_disks[i]); | |
479 | blk_cleanup_queue(rd_queue[i]); | |
480 | } | |
481 | return err; | |
482 | } | |
483 | ||
484 | module_init(rd_init); | |
485 | module_exit(rd_cleanup); | |
486 | ||
487 | /* options - nonmodular */ | |
488 | #ifndef MODULE | |
489 | static int __init ramdisk_size(char *str) | |
490 | { | |
491 | rd_size = simple_strtol(str,NULL,0); | |
492 | return 1; | |
493 | } | |
494 | static int __init ramdisk_size2(char *str) /* kludge */ | |
495 | { | |
496 | return ramdisk_size(str); | |
497 | } | |
498 | static int __init ramdisk_blocksize(char *str) | |
499 | { | |
500 | rd_blocksize = simple_strtol(str,NULL,0); | |
501 | return 1; | |
502 | } | |
503 | __setup("ramdisk=", ramdisk_size); | |
504 | __setup("ramdisk_size=", ramdisk_size2); | |
505 | __setup("ramdisk_blocksize=", ramdisk_blocksize); | |
506 | #endif | |
507 | ||
508 | /* options - modular */ | |
509 | module_param(rd_size, int, 0); | |
510 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); | |
511 | module_param(rd_blocksize, int, 0); | |
512 | MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); | |
513 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | |
514 | ||
515 | MODULE_LICENSE("GPL"); |