Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. | |
3 | * | |
4 | * (C) Chad Page, Theodore Ts'o, et. al, 1995. | |
5 | * | |
6 | * This RAM disk is designed to have filesystems created on it and mounted | |
7 | * just like a regular floppy disk. | |
8 | * | |
9 | * It also does something suggested by Linus: use the buffer cache as the | |
10 | * RAM disk data. This makes it possible to dynamically allocate the RAM disk | |
11 | * buffer - with some consequences I have to deal with as I write this. | |
12 | * | |
13 | * This code is based on the original ramdisk.c, written mostly by | |
14 | * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by | |
15 | * Chad Page to use the buffer cache to store the RAM disk data in | |
16 | * 1995; Theodore then took over the driver again, and cleaned it up | |
17 | * for inclusion in the mainline kernel. | |
18 | * | |
19 | * The original CRAMDISK code was written by Richard Lyons, and | |
20 | * adapted by Chad Page to use the new RAM disk interface. Theodore | |
21 | * Ts'o rewrote it so that both the compressed RAM disk loader and the | |
22 | * kernel decompressor uses the same inflate.c codebase. The RAM disk | |
23 | * loader now also loads into a dynamic (buffer cache based) RAM disk, | |
24 | * not the old static RAM disk. Support for the old static RAM disk has | |
25 | * been completely removed. | |
26 | * | |
27 | * Loadable module support added by Tom Dyas. | |
28 | * | |
29 | * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): | |
30 | * Cosmetic changes in #ifdef MODULE, code movement, etc. | |
31 | * When the RAM disk module is removed, free the protected buffers | |
32 | * Default RAM disk size changed to 2.88 MB | |
33 | * | |
34 | * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 | |
35 | * | |
36 | * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) | |
37 | * - Chad Page | |
38 | * | |
39 | * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 | |
40 | * | |
41 | * Make block size and block size shift for RAM disks a global macro | |
42 | * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 | |
43 | */ | |
44 | ||
1da177e4 LT |
45 | #include <linux/string.h> |
46 | #include <linux/slab.h> | |
47 | #include <asm/atomic.h> | |
48 | #include <linux/bio.h> | |
49 | #include <linux/module.h> | |
50 | #include <linux/moduleparam.h> | |
51 | #include <linux/init.h> | |
1da177e4 LT |
52 | #include <linux/pagemap.h> |
53 | #include <linux/blkdev.h> | |
54 | #include <linux/genhd.h> | |
55 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ | |
56 | #include <linux/backing-dev.h> | |
57 | #include <linux/blkpg.h> | |
58 | #include <linux/writeback.h> | |
59 | ||
60 | #include <asm/uaccess.h> | |
61 | ||
62 | /* Various static variables go here. Most are used only in the RAM disk code. | |
63 | */ | |
64 | ||
65 | static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; | |
66 | static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */ | |
67 | static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT]; | |
68 | ||
69 | /* | |
70 | * Parameters for the boot-loading of the RAM disk. These are set by | |
71 | * init/main.c (from arguments to the kernel command line) or from the | |
72 | * architecture-specific setup routine (from the stored boot sector | |
73 | * information). | |
74 | */ | |
cccf2508 | 75 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ |
1da177e4 LT |
76 | /* |
77 | * It would be very desirable to have a soft-blocksize (that in the case | |
78 | * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because | |
79 | * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of | |
80 | * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages | |
81 | * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only | |
82 | * 1 page will be protected. Depending on the size of the ramdisk you | |
83 | * may want to change the ramdisk blocksize to achieve a better or worse MM | |
84 | * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that | |
85 | * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). | |
86 | */ | |
bef317e3 | 87 | static int rd_blocksize = CONFIG_BLK_DEV_RAM_BLOCKSIZE; |
1da177e4 LT |
88 | |
89 | /* | |
90 | * Copyright (C) 2000 Linus Torvalds. | |
91 | * 2000 Transmeta Corp. | |
92 | * aops copied from ramfs. | |
93 | */ | |
94 | ||
95 | /* | |
96 | * If a ramdisk page has buffers, some may be uptodate and some may be not. | |
97 | * To bring the page uptodate we zero out the non-uptodate buffers. The | |
98 | * page must be locked. | |
99 | */ | |
100 | static void make_page_uptodate(struct page *page) | |
101 | { | |
102 | if (page_has_buffers(page)) { | |
103 | struct buffer_head *bh = page_buffers(page); | |
104 | struct buffer_head *head = bh; | |
105 | ||
106 | do { | |
107 | if (!buffer_uptodate(bh)) { | |
108 | memset(bh->b_data, 0, bh->b_size); | |
109 | /* | |
110 | * akpm: I'm totally undecided about this. The | |
111 | * buffer has just been magically brought "up to | |
112 | * date", but nobody should want to be reading | |
113 | * it anyway, because it hasn't been used for | |
114 | * anything yet. It is still in a "not read | |
115 | * from disk yet" state. | |
116 | * | |
117 | * But non-uptodate buffers against an uptodate | |
118 | * page are against the rules. So do it anyway. | |
119 | */ | |
120 | set_buffer_uptodate(bh); | |
121 | } | |
122 | } while ((bh = bh->b_this_page) != head); | |
123 | } else { | |
124 | memset(page_address(page), 0, PAGE_CACHE_SIZE); | |
125 | } | |
126 | flush_dcache_page(page); | |
127 | SetPageUptodate(page); | |
128 | } | |
129 | ||
130 | static int ramdisk_readpage(struct file *file, struct page *page) | |
131 | { | |
132 | if (!PageUptodate(page)) | |
133 | make_page_uptodate(page); | |
134 | unlock_page(page); | |
135 | return 0; | |
136 | } | |
137 | ||
138 | static int ramdisk_prepare_write(struct file *file, struct page *page, | |
139 | unsigned offset, unsigned to) | |
140 | { | |
141 | if (!PageUptodate(page)) | |
142 | make_page_uptodate(page); | |
143 | return 0; | |
144 | } | |
145 | ||
146 | static int ramdisk_commit_write(struct file *file, struct page *page, | |
147 | unsigned offset, unsigned to) | |
148 | { | |
149 | set_page_dirty(page); | |
150 | return 0; | |
151 | } | |
152 | ||
153 | /* | |
59c51591 | 154 | * ->writepage to the blockdev's mapping has to redirty the page so that the |
994fc28c | 155 | * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM |
1da177e4 LT |
156 | * won't try to (pointlessly) write the page again for a while. |
157 | * | |
158 | * Really, these pages should not be on the LRU at all. | |
159 | */ | |
160 | static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) | |
161 | { | |
162 | if (!PageUptodate(page)) | |
163 | make_page_uptodate(page); | |
164 | SetPageDirty(page); | |
165 | if (wbc->for_reclaim) | |
994fc28c | 166 | return AOP_WRITEPAGE_ACTIVATE; |
1da177e4 LT |
167 | unlock_page(page); |
168 | return 0; | |
169 | } | |
170 | ||
171 | /* | |
172 | * This is a little speedup thing: short-circuit attempts to write back the | |
173 | * ramdisk blockdev inode to its non-existent backing store. | |
174 | */ | |
175 | static int ramdisk_writepages(struct address_space *mapping, | |
176 | struct writeback_control *wbc) | |
177 | { | |
178 | return 0; | |
179 | } | |
180 | ||
181 | /* | |
182 | * ramdisk blockdev pages have their own ->set_page_dirty() because we don't | |
183 | * want them to contribute to dirty memory accounting. | |
184 | */ | |
185 | static int ramdisk_set_page_dirty(struct page *page) | |
186 | { | |
4741c9fd AM |
187 | if (!TestSetPageDirty(page)) |
188 | return 1; | |
1da177e4 LT |
189 | return 0; |
190 | } | |
191 | ||
5d0360ee CB |
192 | /* |
193 | * releasepage is called by pagevec_strip/try_to_release_page if | |
194 | * buffers_heads_over_limit is true. Without a releasepage function | |
195 | * try_to_free_buffers is called instead. That can unset the dirty | |
196 | * bit of our ram disk pages, which will be eventually freed, even | |
197 | * if the page is still in use. | |
198 | */ | |
199 | static int ramdisk_releasepage(struct page *page, gfp_t dummy) | |
200 | { | |
201 | return 0; | |
202 | } | |
203 | ||
f5e54d6e | 204 | static const struct address_space_operations ramdisk_aops = { |
1da177e4 LT |
205 | .readpage = ramdisk_readpage, |
206 | .prepare_write = ramdisk_prepare_write, | |
207 | .commit_write = ramdisk_commit_write, | |
208 | .writepage = ramdisk_writepage, | |
209 | .set_page_dirty = ramdisk_set_page_dirty, | |
210 | .writepages = ramdisk_writepages, | |
5d0360ee | 211 | .releasepage = ramdisk_releasepage, |
1da177e4 LT |
212 | }; |
213 | ||
214 | static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, | |
215 | struct address_space *mapping) | |
216 | { | |
217 | pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); | |
218 | unsigned int vec_offset = vec->bv_offset; | |
219 | int offset = (sector << 9) & ~PAGE_CACHE_MASK; | |
220 | int size = vec->bv_len; | |
221 | int err = 0; | |
222 | ||
223 | do { | |
224 | int count; | |
225 | struct page *page; | |
226 | char *src; | |
227 | char *dst; | |
228 | ||
229 | count = PAGE_CACHE_SIZE - offset; | |
230 | if (count > size) | |
231 | count = size; | |
232 | size -= count; | |
233 | ||
234 | page = grab_cache_page(mapping, index); | |
235 | if (!page) { | |
236 | err = -ENOMEM; | |
237 | goto out; | |
238 | } | |
239 | ||
240 | if (!PageUptodate(page)) | |
241 | make_page_uptodate(page); | |
242 | ||
243 | index++; | |
244 | ||
245 | if (rw == READ) { | |
246 | src = kmap_atomic(page, KM_USER0) + offset; | |
247 | dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; | |
248 | } else { | |
249 | src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; | |
250 | dst = kmap_atomic(page, KM_USER1) + offset; | |
251 | } | |
252 | offset = 0; | |
253 | vec_offset += count; | |
254 | ||
255 | memcpy(dst, src, count); | |
256 | ||
257 | kunmap_atomic(src, KM_USER0); | |
258 | kunmap_atomic(dst, KM_USER1); | |
259 | ||
260 | if (rw == READ) | |
261 | flush_dcache_page(vec->bv_page); | |
262 | else | |
263 | set_page_dirty(page); | |
264 | unlock_page(page); | |
265 | put_page(page); | |
266 | } while (size); | |
267 | ||
268 | out: | |
269 | return err; | |
270 | } | |
271 | ||
272 | /* | |
273 | * Basically, my strategy here is to set up a buffer-head which can't be | |
274 | * deleted, and make that my Ramdisk. If the request is outside of the | |
275 | * allocated size, we must get rid of it... | |
276 | * | |
277 | * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support | |
278 | * | |
279 | */ | |
165125e1 | 280 | static int rd_make_request(struct request_queue *q, struct bio *bio) |
1da177e4 LT |
281 | { |
282 | struct block_device *bdev = bio->bi_bdev; | |
283 | struct address_space * mapping = bdev->bd_inode->i_mapping; | |
284 | sector_t sector = bio->bi_sector; | |
285 | unsigned long len = bio->bi_size >> 9; | |
286 | int rw = bio_data_dir(bio); | |
287 | struct bio_vec *bvec; | |
288 | int ret = 0, i; | |
289 | ||
290 | if (sector + len > get_capacity(bdev->bd_disk)) | |
291 | goto fail; | |
292 | ||
293 | if (rw==READA) | |
294 | rw=READ; | |
295 | ||
296 | bio_for_each_segment(bvec, bio, i) { | |
297 | ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping); | |
298 | sector += bvec->bv_len >> 9; | |
299 | } | |
300 | if (ret) | |
301 | goto fail; | |
302 | ||
6712ecf8 | 303 | bio_endio(bio, 0); |
1da177e4 LT |
304 | return 0; |
305 | fail: | |
6712ecf8 | 306 | bio_io_error(bio); |
1da177e4 LT |
307 | return 0; |
308 | } | |
309 | ||
310 | static int rd_ioctl(struct inode *inode, struct file *file, | |
311 | unsigned int cmd, unsigned long arg) | |
312 | { | |
313 | int error; | |
314 | struct block_device *bdev = inode->i_bdev; | |
315 | ||
316 | if (cmd != BLKFLSBUF) | |
317 | return -ENOTTY; | |
318 | ||
319 | /* | |
320 | * special: we want to release the ramdisk memory, it's not like with | |
321 | * the other blockdevices where this ioctl only flushes away the buffer | |
322 | * cache | |
323 | */ | |
324 | error = -EBUSY; | |
c039e313 | 325 | mutex_lock(&bdev->bd_mutex); |
1da177e4 LT |
326 | if (bdev->bd_openers <= 2) { |
327 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | |
328 | error = 0; | |
329 | } | |
c039e313 | 330 | mutex_unlock(&bdev->bd_mutex); |
1da177e4 LT |
331 | return error; |
332 | } | |
333 | ||
334 | /* | |
335 | * This is the backing_dev_info for the blockdev inode itself. It doesn't need | |
336 | * writeback and it does not contribute to dirty memory accounting. | |
337 | */ | |
338 | static struct backing_dev_info rd_backing_dev_info = { | |
339 | .ra_pages = 0, /* No readahead */ | |
340 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY, | |
341 | .unplug_io_fn = default_unplug_io_fn, | |
342 | }; | |
343 | ||
344 | /* | |
345 | * This is the backing_dev_info for the files which live atop the ramdisk | |
346 | * "device". These files do need writeback and they do contribute to dirty | |
347 | * memory accounting. | |
348 | */ | |
349 | static struct backing_dev_info rd_file_backing_dev_info = { | |
350 | .ra_pages = 0, /* No readahead */ | |
351 | .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */ | |
352 | .unplug_io_fn = default_unplug_io_fn, | |
353 | }; | |
354 | ||
355 | static int rd_open(struct inode *inode, struct file *filp) | |
356 | { | |
357 | unsigned unit = iminor(inode); | |
358 | ||
359 | if (rd_bdev[unit] == NULL) { | |
360 | struct block_device *bdev = inode->i_bdev; | |
361 | struct address_space *mapping; | |
362 | unsigned bsize; | |
b4e3ca1a | 363 | gfp_t gfp_mask; |
1da177e4 LT |
364 | |
365 | inode = igrab(bdev->bd_inode); | |
366 | rd_bdev[unit] = bdev; | |
367 | bdev->bd_openers++; | |
368 | bsize = bdev_hardsect_size(bdev); | |
369 | bdev->bd_block_size = bsize; | |
370 | inode->i_blkbits = blksize_bits(bsize); | |
371 | inode->i_size = get_capacity(bdev->bd_disk)<<9; | |
372 | ||
373 | mapping = inode->i_mapping; | |
374 | mapping->a_ops = &ramdisk_aops; | |
375 | mapping->backing_dev_info = &rd_backing_dev_info; | |
376 | bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info; | |
377 | ||
378 | /* | |
379 | * Deep badness. rd_blkdev_pagecache_IO() needs to allocate | |
380 | * pagecache pages within a request_fn. We cannot recur back | |
3a4fa0a2 | 381 | * into the filesystem which is mounted atop the ramdisk, because |
1da177e4 LT |
382 | * that would deadlock on fs locks. And we really don't want |
383 | * to reenter rd_blkdev_pagecache_IO when we're already within | |
384 | * that function. | |
385 | * | |
386 | * So we turn off __GFP_FS and __GFP_IO. | |
387 | * | |
388 | * And to give this thing a hope of working, turn on __GFP_HIGH. | |
389 | * Hopefully, there's enough regular memory allocation going on | |
390 | * for the page allocator emergency pools to keep the ramdisk | |
391 | * driver happy. | |
392 | */ | |
393 | gfp_mask = mapping_gfp_mask(mapping); | |
394 | gfp_mask &= ~(__GFP_FS|__GFP_IO); | |
395 | gfp_mask |= __GFP_HIGH; | |
396 | mapping_set_gfp_mask(mapping, gfp_mask); | |
397 | } | |
398 | ||
399 | return 0; | |
400 | } | |
401 | ||
402 | static struct block_device_operations rd_bd_op = { | |
403 | .owner = THIS_MODULE, | |
404 | .open = rd_open, | |
405 | .ioctl = rd_ioctl, | |
406 | }; | |
407 | ||
408 | /* | |
409 | * Before freeing the module, invalidate all of the protected buffers! | |
410 | */ | |
411 | static void __exit rd_cleanup(void) | |
412 | { | |
413 | int i; | |
414 | ||
415 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
416 | struct block_device *bdev = rd_bdev[i]; | |
417 | rd_bdev[i] = NULL; | |
418 | if (bdev) { | |
f98393a6 | 419 | invalidate_bdev(bdev); |
1da177e4 LT |
420 | blkdev_put(bdev); |
421 | } | |
422 | del_gendisk(rd_disks[i]); | |
423 | put_disk(rd_disks[i]); | |
424 | blk_cleanup_queue(rd_queue[i]); | |
425 | } | |
1da177e4 | 426 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
e0bf68dd PZ |
427 | |
428 | bdi_destroy(&rd_file_backing_dev_info); | |
429 | bdi_destroy(&rd_backing_dev_info); | |
1da177e4 LT |
430 | } |
431 | ||
432 | /* | |
433 | * This is the registration and initialization section of the RAM disk driver | |
434 | */ | |
435 | static int __init rd_init(void) | |
436 | { | |
437 | int i; | |
e0bf68dd PZ |
438 | int err; |
439 | ||
440 | err = bdi_init(&rd_backing_dev_info); | |
441 | if (err) | |
442 | goto out2; | |
443 | ||
444 | err = bdi_init(&rd_file_backing_dev_info); | |
445 | if (err) { | |
446 | bdi_destroy(&rd_backing_dev_info); | |
447 | goto out2; | |
448 | } | |
449 | ||
450 | err = -ENOMEM; | |
1da177e4 LT |
451 | |
452 | if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || | |
453 | (rd_blocksize & (rd_blocksize-1))) { | |
454 | printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", | |
455 | rd_blocksize); | |
456 | rd_blocksize = BLOCK_SIZE; | |
457 | } | |
458 | ||
459 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
460 | rd_disks[i] = alloc_disk(1); | |
461 | if (!rd_disks[i]) | |
462 | goto out; | |
ea6f94df AM |
463 | |
464 | rd_queue[i] = blk_alloc_queue(GFP_KERNEL); | |
465 | if (!rd_queue[i]) { | |
466 | put_disk(rd_disks[i]); | |
467 | goto out; | |
468 | } | |
1da177e4 LT |
469 | } |
470 | ||
471 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) { | |
472 | err = -EIO; | |
473 | goto out; | |
474 | } | |
475 | ||
1da177e4 LT |
476 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { |
477 | struct gendisk *disk = rd_disks[i]; | |
478 | ||
1da177e4 LT |
479 | blk_queue_make_request(rd_queue[i], &rd_make_request); |
480 | blk_queue_hardsect_size(rd_queue[i], rd_blocksize); | |
481 | ||
482 | /* rd_size is given in kB */ | |
483 | disk->major = RAMDISK_MAJOR; | |
484 | disk->first_minor = i; | |
485 | disk->fops = &rd_bd_op; | |
486 | disk->queue = rd_queue[i]; | |
487 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | |
488 | sprintf(disk->disk_name, "ram%d", i); | |
1da177e4 LT |
489 | set_capacity(disk, rd_size * 2); |
490 | add_disk(rd_disks[i]); | |
491 | } | |
492 | ||
493 | /* rd_size is given in kB */ | |
494 | printk("RAMDISK driver initialized: " | |
495 | "%d RAM disks of %dK size %d blocksize\n", | |
496 | CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize); | |
497 | ||
498 | return 0; | |
1da177e4 LT |
499 | out: |
500 | while (i--) { | |
501 | put_disk(rd_disks[i]); | |
502 | blk_cleanup_queue(rd_queue[i]); | |
503 | } | |
e0bf68dd PZ |
504 | bdi_destroy(&rd_backing_dev_info); |
505 | bdi_destroy(&rd_file_backing_dev_info); | |
506 | out2: | |
1da177e4 LT |
507 | return err; |
508 | } | |
509 | ||
510 | module_init(rd_init); | |
511 | module_exit(rd_cleanup); | |
512 | ||
513 | /* options - nonmodular */ | |
514 | #ifndef MODULE | |
515 | static int __init ramdisk_size(char *str) | |
516 | { | |
517 | rd_size = simple_strtol(str,NULL,0); | |
518 | return 1; | |
519 | } | |
1da177e4 LT |
520 | static int __init ramdisk_blocksize(char *str) |
521 | { | |
522 | rd_blocksize = simple_strtol(str,NULL,0); | |
523 | return 1; | |
524 | } | |
fac8b209 | 525 | __setup("ramdisk_size=", ramdisk_size); |
1da177e4 LT |
526 | __setup("ramdisk_blocksize=", ramdisk_blocksize); |
527 | #endif | |
528 | ||
529 | /* options - modular */ | |
530 | module_param(rd_size, int, 0); | |
531 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); | |
532 | module_param(rd_blocksize, int, 0); | |
533 | MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); | |
534 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | |
535 | ||
536 | MODULE_LICENSE("GPL"); |