Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. | |
3 | * | |
4 | * (C) Chad Page, Theodore Ts'o, et. al, 1995. | |
5 | * | |
6 | * This RAM disk is designed to have filesystems created on it and mounted | |
7 | * just like a regular floppy disk. | |
8 | * | |
9 | * It also does something suggested by Linus: use the buffer cache as the | |
10 | * RAM disk data. This makes it possible to dynamically allocate the RAM disk | |
11 | * buffer - with some consequences I have to deal with as I write this. | |
12 | * | |
13 | * This code is based on the original ramdisk.c, written mostly by | |
14 | * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by | |
15 | * Chad Page to use the buffer cache to store the RAM disk data in | |
16 | * 1995; Theodore then took over the driver again, and cleaned it up | |
17 | * for inclusion in the mainline kernel. | |
18 | * | |
19 | * The original CRAMDISK code was written by Richard Lyons, and | |
20 | * adapted by Chad Page to use the new RAM disk interface. Theodore | |
21 | * Ts'o rewrote it so that both the compressed RAM disk loader and the | |
22 | * kernel decompressor uses the same inflate.c codebase. The RAM disk | |
23 | * loader now also loads into a dynamic (buffer cache based) RAM disk, | |
24 | * not the old static RAM disk. Support for the old static RAM disk has | |
25 | * been completely removed. | |
26 | * | |
27 | * Loadable module support added by Tom Dyas. | |
28 | * | |
29 | * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): | |
30 | * Cosmetic changes in #ifdef MODULE, code movement, etc. | |
31 | * When the RAM disk module is removed, free the protected buffers | |
32 | * Default RAM disk size changed to 2.88 MB | |
33 | * | |
34 | * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 | |
35 | * | |
36 | * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) | |
37 | * - Chad Page | |
38 | * | |
39 | * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 | |
40 | * | |
41 | * Make block size and block size shift for RAM disks a global macro | |
42 | * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 | |
43 | */ | |
44 | ||
45 | #include <linux/config.h> | |
46 | #include <linux/string.h> | |
47 | #include <linux/slab.h> | |
48 | #include <asm/atomic.h> | |
49 | #include <linux/bio.h> | |
50 | #include <linux/module.h> | |
51 | #include <linux/moduleparam.h> | |
52 | #include <linux/init.h> | |
53 | #include <linux/devfs_fs_kernel.h> | |
54 | #include <linux/pagemap.h> | |
55 | #include <linux/blkdev.h> | |
56 | #include <linux/genhd.h> | |
57 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ | |
58 | #include <linux/backing-dev.h> | |
59 | #include <linux/blkpg.h> | |
60 | #include <linux/writeback.h> | |
61 | ||
62 | #include <asm/uaccess.h> | |
63 | ||
64 | /* Various static variables go here. Most are used only in the RAM disk code. | |
65 | */ | |
66 | ||
67 | static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; | |
68 | static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */ | |
69 | static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT]; | |
70 | ||
71 | /* | |
72 | * Parameters for the boot-loading of the RAM disk. These are set by | |
73 | * init/main.c (from arguments to the kernel command line) or from the | |
74 | * architecture-specific setup routine (from the stored boot sector | |
75 | * information). | |
76 | */ | |
cccf2508 | 77 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ |
1da177e4 LT |
78 | /* |
79 | * It would be very desirable to have a soft-blocksize (that in the case | |
80 | * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because | |
81 | * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of | |
82 | * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages | |
83 | * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only | |
84 | * 1 page will be protected. Depending on the size of the ramdisk you | |
85 | * may want to change the ramdisk blocksize to achieve a better or worse MM | |
86 | * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that | |
87 | * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). | |
88 | */ | |
89 | static int rd_blocksize = BLOCK_SIZE; /* blocksize of the RAM disks */ | |
90 | ||
91 | /* | |
92 | * Copyright (C) 2000 Linus Torvalds. | |
93 | * 2000 Transmeta Corp. | |
94 | * aops copied from ramfs. | |
95 | */ | |
96 | ||
97 | /* | |
98 | * If a ramdisk page has buffers, some may be uptodate and some may be not. | |
99 | * To bring the page uptodate we zero out the non-uptodate buffers. The | |
100 | * page must be locked. | |
101 | */ | |
102 | static void make_page_uptodate(struct page *page) | |
103 | { | |
104 | if (page_has_buffers(page)) { | |
105 | struct buffer_head *bh = page_buffers(page); | |
106 | struct buffer_head *head = bh; | |
107 | ||
108 | do { | |
109 | if (!buffer_uptodate(bh)) { | |
110 | memset(bh->b_data, 0, bh->b_size); | |
111 | /* | |
112 | * akpm: I'm totally undecided about this. The | |
113 | * buffer has just been magically brought "up to | |
114 | * date", but nobody should want to be reading | |
115 | * it anyway, because it hasn't been used for | |
116 | * anything yet. It is still in a "not read | |
117 | * from disk yet" state. | |
118 | * | |
119 | * But non-uptodate buffers against an uptodate | |
120 | * page are against the rules. So do it anyway. | |
121 | */ | |
122 | set_buffer_uptodate(bh); | |
123 | } | |
124 | } while ((bh = bh->b_this_page) != head); | |
125 | } else { | |
126 | memset(page_address(page), 0, PAGE_CACHE_SIZE); | |
127 | } | |
128 | flush_dcache_page(page); | |
129 | SetPageUptodate(page); | |
130 | } | |
131 | ||
132 | static int ramdisk_readpage(struct file *file, struct page *page) | |
133 | { | |
134 | if (!PageUptodate(page)) | |
135 | make_page_uptodate(page); | |
136 | unlock_page(page); | |
137 | return 0; | |
138 | } | |
139 | ||
140 | static int ramdisk_prepare_write(struct file *file, struct page *page, | |
141 | unsigned offset, unsigned to) | |
142 | { | |
143 | if (!PageUptodate(page)) | |
144 | make_page_uptodate(page); | |
145 | return 0; | |
146 | } | |
147 | ||
148 | static int ramdisk_commit_write(struct file *file, struct page *page, | |
149 | unsigned offset, unsigned to) | |
150 | { | |
151 | set_page_dirty(page); | |
152 | return 0; | |
153 | } | |
154 | ||
155 | /* | |
156 | * ->writepage to the the blockdev's mapping has to redirty the page so that the | |
994fc28c | 157 | * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM |
1da177e4 LT |
158 | * won't try to (pointlessly) write the page again for a while. |
159 | * | |
160 | * Really, these pages should not be on the LRU at all. | |
161 | */ | |
162 | static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) | |
163 | { | |
164 | if (!PageUptodate(page)) | |
165 | make_page_uptodate(page); | |
166 | SetPageDirty(page); | |
167 | if (wbc->for_reclaim) | |
994fc28c | 168 | return AOP_WRITEPAGE_ACTIVATE; |
1da177e4 LT |
169 | unlock_page(page); |
170 | return 0; | |
171 | } | |
172 | ||
173 | /* | |
174 | * This is a little speedup thing: short-circuit attempts to write back the | |
175 | * ramdisk blockdev inode to its non-existent backing store. | |
176 | */ | |
177 | static int ramdisk_writepages(struct address_space *mapping, | |
178 | struct writeback_control *wbc) | |
179 | { | |
180 | return 0; | |
181 | } | |
182 | ||
183 | /* | |
184 | * ramdisk blockdev pages have their own ->set_page_dirty() because we don't | |
185 | * want them to contribute to dirty memory accounting. | |
186 | */ | |
187 | static int ramdisk_set_page_dirty(struct page *page) | |
188 | { | |
4741c9fd AM |
189 | if (!TestSetPageDirty(page)) |
190 | return 1; | |
1da177e4 LT |
191 | return 0; |
192 | } | |
193 | ||
194 | static struct address_space_operations ramdisk_aops = { | |
195 | .readpage = ramdisk_readpage, | |
196 | .prepare_write = ramdisk_prepare_write, | |
197 | .commit_write = ramdisk_commit_write, | |
198 | .writepage = ramdisk_writepage, | |
199 | .set_page_dirty = ramdisk_set_page_dirty, | |
200 | .writepages = ramdisk_writepages, | |
201 | }; | |
202 | ||
203 | static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, | |
204 | struct address_space *mapping) | |
205 | { | |
206 | pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); | |
207 | unsigned int vec_offset = vec->bv_offset; | |
208 | int offset = (sector << 9) & ~PAGE_CACHE_MASK; | |
209 | int size = vec->bv_len; | |
210 | int err = 0; | |
211 | ||
212 | do { | |
213 | int count; | |
214 | struct page *page; | |
215 | char *src; | |
216 | char *dst; | |
217 | ||
218 | count = PAGE_CACHE_SIZE - offset; | |
219 | if (count > size) | |
220 | count = size; | |
221 | size -= count; | |
222 | ||
223 | page = grab_cache_page(mapping, index); | |
224 | if (!page) { | |
225 | err = -ENOMEM; | |
226 | goto out; | |
227 | } | |
228 | ||
229 | if (!PageUptodate(page)) | |
230 | make_page_uptodate(page); | |
231 | ||
232 | index++; | |
233 | ||
234 | if (rw == READ) { | |
235 | src = kmap_atomic(page, KM_USER0) + offset; | |
236 | dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; | |
237 | } else { | |
238 | src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; | |
239 | dst = kmap_atomic(page, KM_USER1) + offset; | |
240 | } | |
241 | offset = 0; | |
242 | vec_offset += count; | |
243 | ||
244 | memcpy(dst, src, count); | |
245 | ||
246 | kunmap_atomic(src, KM_USER0); | |
247 | kunmap_atomic(dst, KM_USER1); | |
248 | ||
249 | if (rw == READ) | |
250 | flush_dcache_page(vec->bv_page); | |
251 | else | |
252 | set_page_dirty(page); | |
253 | unlock_page(page); | |
254 | put_page(page); | |
255 | } while (size); | |
256 | ||
257 | out: | |
258 | return err; | |
259 | } | |
260 | ||
261 | /* | |
262 | * Basically, my strategy here is to set up a buffer-head which can't be | |
263 | * deleted, and make that my Ramdisk. If the request is outside of the | |
264 | * allocated size, we must get rid of it... | |
265 | * | |
266 | * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support | |
267 | * | |
268 | */ | |
269 | static int rd_make_request(request_queue_t *q, struct bio *bio) | |
270 | { | |
271 | struct block_device *bdev = bio->bi_bdev; | |
272 | struct address_space * mapping = bdev->bd_inode->i_mapping; | |
273 | sector_t sector = bio->bi_sector; | |
274 | unsigned long len = bio->bi_size >> 9; | |
275 | int rw = bio_data_dir(bio); | |
276 | struct bio_vec *bvec; | |
277 | int ret = 0, i; | |
278 | ||
279 | if (sector + len > get_capacity(bdev->bd_disk)) | |
280 | goto fail; | |
281 | ||
282 | if (rw==READA) | |
283 | rw=READ; | |
284 | ||
285 | bio_for_each_segment(bvec, bio, i) { | |
286 | ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping); | |
287 | sector += bvec->bv_len >> 9; | |
288 | } | |
289 | if (ret) | |
290 | goto fail; | |
291 | ||
292 | bio_endio(bio, bio->bi_size, 0); | |
293 | return 0; | |
294 | fail: | |
295 | bio_io_error(bio, bio->bi_size); | |
296 | return 0; | |
297 | } | |
298 | ||
299 | static int rd_ioctl(struct inode *inode, struct file *file, | |
300 | unsigned int cmd, unsigned long arg) | |
301 | { | |
302 | int error; | |
303 | struct block_device *bdev = inode->i_bdev; | |
304 | ||
305 | if (cmd != BLKFLSBUF) | |
306 | return -ENOTTY; | |
307 | ||
308 | /* | |
309 | * special: we want to release the ramdisk memory, it's not like with | |
310 | * the other blockdevices where this ioctl only flushes away the buffer | |
311 | * cache | |
312 | */ | |
313 | error = -EBUSY; | |
c039e313 | 314 | mutex_lock(&bdev->bd_mutex); |
1da177e4 LT |
315 | if (bdev->bd_openers <= 2) { |
316 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | |
317 | error = 0; | |
318 | } | |
c039e313 | 319 | mutex_unlock(&bdev->bd_mutex); |
1da177e4 LT |
320 | return error; |
321 | } | |
322 | ||
323 | /* | |
324 | * This is the backing_dev_info for the blockdev inode itself. It doesn't need | |
325 | * writeback and it does not contribute to dirty memory accounting. | |
326 | */ | |
327 | static struct backing_dev_info rd_backing_dev_info = { | |
328 | .ra_pages = 0, /* No readahead */ | |
329 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY, | |
330 | .unplug_io_fn = default_unplug_io_fn, | |
331 | }; | |
332 | ||
333 | /* | |
334 | * This is the backing_dev_info for the files which live atop the ramdisk | |
335 | * "device". These files do need writeback and they do contribute to dirty | |
336 | * memory accounting. | |
337 | */ | |
338 | static struct backing_dev_info rd_file_backing_dev_info = { | |
339 | .ra_pages = 0, /* No readahead */ | |
340 | .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */ | |
341 | .unplug_io_fn = default_unplug_io_fn, | |
342 | }; | |
343 | ||
344 | static int rd_open(struct inode *inode, struct file *filp) | |
345 | { | |
346 | unsigned unit = iminor(inode); | |
347 | ||
348 | if (rd_bdev[unit] == NULL) { | |
349 | struct block_device *bdev = inode->i_bdev; | |
350 | struct address_space *mapping; | |
351 | unsigned bsize; | |
b4e3ca1a | 352 | gfp_t gfp_mask; |
1da177e4 LT |
353 | |
354 | inode = igrab(bdev->bd_inode); | |
355 | rd_bdev[unit] = bdev; | |
356 | bdev->bd_openers++; | |
357 | bsize = bdev_hardsect_size(bdev); | |
358 | bdev->bd_block_size = bsize; | |
359 | inode->i_blkbits = blksize_bits(bsize); | |
360 | inode->i_size = get_capacity(bdev->bd_disk)<<9; | |
361 | ||
362 | mapping = inode->i_mapping; | |
363 | mapping->a_ops = &ramdisk_aops; | |
364 | mapping->backing_dev_info = &rd_backing_dev_info; | |
365 | bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info; | |
366 | ||
367 | /* | |
368 | * Deep badness. rd_blkdev_pagecache_IO() needs to allocate | |
369 | * pagecache pages within a request_fn. We cannot recur back | |
370 | * into the filesytem which is mounted atop the ramdisk, because | |
371 | * that would deadlock on fs locks. And we really don't want | |
372 | * to reenter rd_blkdev_pagecache_IO when we're already within | |
373 | * that function. | |
374 | * | |
375 | * So we turn off __GFP_FS and __GFP_IO. | |
376 | * | |
377 | * And to give this thing a hope of working, turn on __GFP_HIGH. | |
378 | * Hopefully, there's enough regular memory allocation going on | |
379 | * for the page allocator emergency pools to keep the ramdisk | |
380 | * driver happy. | |
381 | */ | |
382 | gfp_mask = mapping_gfp_mask(mapping); | |
383 | gfp_mask &= ~(__GFP_FS|__GFP_IO); | |
384 | gfp_mask |= __GFP_HIGH; | |
385 | mapping_set_gfp_mask(mapping, gfp_mask); | |
386 | } | |
387 | ||
388 | return 0; | |
389 | } | |
390 | ||
391 | static struct block_device_operations rd_bd_op = { | |
392 | .owner = THIS_MODULE, | |
393 | .open = rd_open, | |
394 | .ioctl = rd_ioctl, | |
395 | }; | |
396 | ||
397 | /* | |
398 | * Before freeing the module, invalidate all of the protected buffers! | |
399 | */ | |
400 | static void __exit rd_cleanup(void) | |
401 | { | |
402 | int i; | |
403 | ||
404 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
405 | struct block_device *bdev = rd_bdev[i]; | |
406 | rd_bdev[i] = NULL; | |
407 | if (bdev) { | |
408 | invalidate_bdev(bdev, 1); | |
409 | blkdev_put(bdev); | |
410 | } | |
411 | del_gendisk(rd_disks[i]); | |
412 | put_disk(rd_disks[i]); | |
413 | blk_cleanup_queue(rd_queue[i]); | |
414 | } | |
415 | devfs_remove("rd"); | |
416 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | |
417 | } | |
418 | ||
419 | /* | |
420 | * This is the registration and initialization section of the RAM disk driver | |
421 | */ | |
422 | static int __init rd_init(void) | |
423 | { | |
424 | int i; | |
425 | int err = -ENOMEM; | |
426 | ||
427 | if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || | |
428 | (rd_blocksize & (rd_blocksize-1))) { | |
429 | printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", | |
430 | rd_blocksize); | |
431 | rd_blocksize = BLOCK_SIZE; | |
432 | } | |
433 | ||
434 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
435 | rd_disks[i] = alloc_disk(1); | |
436 | if (!rd_disks[i]) | |
437 | goto out; | |
438 | } | |
439 | ||
440 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) { | |
441 | err = -EIO; | |
442 | goto out; | |
443 | } | |
444 | ||
445 | devfs_mk_dir("rd"); | |
446 | ||
447 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
448 | struct gendisk *disk = rd_disks[i]; | |
449 | ||
450 | rd_queue[i] = blk_alloc_queue(GFP_KERNEL); | |
451 | if (!rd_queue[i]) | |
452 | goto out_queue; | |
453 | ||
454 | blk_queue_make_request(rd_queue[i], &rd_make_request); | |
455 | blk_queue_hardsect_size(rd_queue[i], rd_blocksize); | |
456 | ||
457 | /* rd_size is given in kB */ | |
458 | disk->major = RAMDISK_MAJOR; | |
459 | disk->first_minor = i; | |
460 | disk->fops = &rd_bd_op; | |
461 | disk->queue = rd_queue[i]; | |
462 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | |
463 | sprintf(disk->disk_name, "ram%d", i); | |
464 | sprintf(disk->devfs_name, "rd/%d", i); | |
465 | set_capacity(disk, rd_size * 2); | |
466 | add_disk(rd_disks[i]); | |
467 | } | |
468 | ||
469 | /* rd_size is given in kB */ | |
470 | printk("RAMDISK driver initialized: " | |
471 | "%d RAM disks of %dK size %d blocksize\n", | |
472 | CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize); | |
473 | ||
474 | return 0; | |
475 | out_queue: | |
476 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | |
477 | out: | |
478 | while (i--) { | |
479 | put_disk(rd_disks[i]); | |
480 | blk_cleanup_queue(rd_queue[i]); | |
481 | } | |
482 | return err; | |
483 | } | |
484 | ||
485 | module_init(rd_init); | |
486 | module_exit(rd_cleanup); | |
487 | ||
488 | /* options - nonmodular */ | |
489 | #ifndef MODULE | |
490 | static int __init ramdisk_size(char *str) | |
491 | { | |
492 | rd_size = simple_strtol(str,NULL,0); | |
493 | return 1; | |
494 | } | |
495 | static int __init ramdisk_size2(char *str) /* kludge */ | |
496 | { | |
497 | return ramdisk_size(str); | |
498 | } | |
499 | static int __init ramdisk_blocksize(char *str) | |
500 | { | |
501 | rd_blocksize = simple_strtol(str,NULL,0); | |
502 | return 1; | |
503 | } | |
504 | __setup("ramdisk=", ramdisk_size); | |
505 | __setup("ramdisk_size=", ramdisk_size2); | |
506 | __setup("ramdisk_blocksize=", ramdisk_blocksize); | |
507 | #endif | |
508 | ||
509 | /* options - modular */ | |
510 | module_param(rd_size, int, 0); | |
511 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); | |
512 | module_param(rd_blocksize, int, 0); | |
513 | MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); | |
514 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | |
515 | ||
516 | MODULE_LICENSE("GPL"); |