Commit | Line | Data |
---|---|---|
306b0c95 NG |
1 | /* |
2 | * Compressed RAM based swap device | |
3 | * | |
4 | * Copyright (C) 2008, 2009 Nitin Gupta | |
5 | * | |
6 | * This code is released using a dual license strategy: BSD/GPL | |
7 | * You can choose the licence that better fits your requirements. | |
8 | * | |
9 | * Released under the terms of 3-clause BSD License | |
10 | * Released under the terms of GNU General Public License Version 2.0 | |
11 | * | |
12 | * Project home: http://compcache.googlecode.com | |
13 | */ | |
14 | ||
15 | #define KMSG_COMPONENT "ramzswap" | |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
20 | #include <linux/bitops.h> | |
21 | #include <linux/blkdev.h> | |
22 | #include <linux/buffer_head.h> | |
23 | #include <linux/device.h> | |
24 | #include <linux/genhd.h> | |
25 | #include <linux/highmem.h> | |
26 | #include <linux/lzo.h> | |
27 | #include <linux/mutex.h> | |
28 | #include <linux/string.h> | |
29 | #include <linux/swap.h> | |
30 | #include <linux/swapops.h> | |
31 | #include <linux/vmalloc.h> | |
32 | #include <linux/version.h> | |
33 | ||
34 | #include "ramzswap_drv.h" | |
35 | ||
36 | /* Globals */ | |
37 | static int ramzswap_major; | |
38 | static struct ramzswap *devices; | |
39 | ||
40 | /* | |
41 | * Pages that compress to larger than this size are | |
42 | * forwarded to backing swap, if present or stored | |
43 | * uncompressed in memory otherwise. | |
44 | */ | |
45 | static unsigned int max_zpage_size; | |
46 | ||
47 | /* Module params (documentation at end) */ | |
48 | static unsigned int num_devices; | |
49 | ||
50 | static int rzs_test_flag(struct ramzswap *rzs, u32 index, | |
51 | enum rzs_pageflags flag) | |
52 | { | |
53 | return rzs->table[index].flags & BIT(flag); | |
54 | } | |
55 | ||
56 | static void rzs_set_flag(struct ramzswap *rzs, u32 index, | |
57 | enum rzs_pageflags flag) | |
58 | { | |
59 | rzs->table[index].flags |= BIT(flag); | |
60 | } | |
61 | ||
62 | static void rzs_clear_flag(struct ramzswap *rzs, u32 index, | |
63 | enum rzs_pageflags flag) | |
64 | { | |
65 | rzs->table[index].flags &= ~BIT(flag); | |
66 | } | |
67 | ||
68 | static int page_zero_filled(void *ptr) | |
69 | { | |
70 | unsigned int pos; | |
71 | unsigned long *page; | |
72 | ||
73 | page = (unsigned long *)ptr; | |
74 | ||
75 | for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { | |
76 | if (page[pos]) | |
77 | return 0; | |
78 | } | |
79 | ||
80 | return 1; | |
81 | } | |
82 | ||
83 | /* | |
84 | * memlimit cannot be greater than backing disk size. | |
85 | */ | |
86 | static void ramzswap_set_memlimit(struct ramzswap *rzs, size_t totalram_bytes) | |
87 | { | |
88 | int memlimit_valid = 1; | |
89 | ||
90 | if (!rzs->memlimit) { | |
91 | pr_info("Memory limit not set.\n"); | |
92 | memlimit_valid = 0; | |
93 | } | |
94 | ||
95 | if (rzs->memlimit > rzs->disksize) { | |
96 | pr_info("Memory limit cannot be greater than " | |
97 | "disksize: limit=%zu, disksize=%zu\n", | |
98 | rzs->memlimit, rzs->disksize); | |
99 | memlimit_valid = 0; | |
100 | } | |
101 | ||
102 | if (!memlimit_valid) { | |
103 | size_t mempart, disksize; | |
104 | pr_info("Using default: smaller of (%u%% of RAM) and " | |
105 | "(backing disk size).\n", | |
106 | default_memlimit_perc_ram); | |
107 | mempart = default_memlimit_perc_ram * (totalram_bytes / 100); | |
108 | disksize = rzs->disksize; | |
109 | rzs->memlimit = mempart > disksize ? disksize : mempart; | |
110 | } | |
111 | ||
112 | if (rzs->memlimit > totalram_bytes / 2) { | |
113 | pr_info( | |
114 | "Its not advisable setting limit more than half of " | |
115 | "size of memory since we expect a 2:1 compression ratio. " | |
116 | "Limit represents amount of *compressed* data we can keep " | |
117 | "in memory!\n" | |
118 | "\tMemory Size: %zu kB\n" | |
119 | "\tLimit you selected: %zu kB\n" | |
120 | "Continuing anyway ...\n", | |
121 | totalram_bytes >> 10, rzs->memlimit >> 10 | |
122 | ); | |
123 | } | |
124 | ||
125 | rzs->memlimit &= PAGE_MASK; | |
126 | BUG_ON(!rzs->memlimit); | |
127 | } | |
128 | ||
129 | static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes) | |
130 | { | |
131 | if (!rzs->disksize) { | |
132 | pr_info( | |
133 | "disk size not provided. You can use disksize_kb module " | |
134 | "param to specify size.\nUsing default: (%u%% of RAM).\n", | |
135 | default_disksize_perc_ram | |
136 | ); | |
137 | rzs->disksize = default_disksize_perc_ram * | |
138 | (totalram_bytes / 100); | |
139 | } | |
140 | ||
141 | if (rzs->disksize > 2 * (totalram_bytes)) { | |
142 | pr_info( | |
143 | "There is little point creating a ramzswap of greater than " | |
144 | "twice the size of memory since we expect a 2:1 compression " | |
145 | "ratio. Note that ramzswap uses about 0.1%% of the size of " | |
146 | "the swap device when not in use so a huge ramzswap is " | |
147 | "wasteful.\n" | |
148 | "\tMemory Size: %zu kB\n" | |
149 | "\tSize you selected: %zu kB\n" | |
150 | "Continuing anyway ...\n", | |
151 | totalram_bytes >> 10, rzs->disksize | |
152 | ); | |
153 | } | |
154 | ||
155 | rzs->disksize &= PAGE_MASK; | |
156 | } | |
157 | ||
158 | /* | |
159 | * Swap header (1st page of swap device) contains information | |
160 | * to indentify it as a swap partition. Prepare such a header | |
161 | * for ramzswap device (ramzswap0) so that swapon can identify | |
162 | * it as swap partition. In case backing swap device is provided, | |
163 | * copy its swap header. | |
164 | */ | |
165 | static int setup_swap_header(struct ramzswap *rzs, union swap_header *s) | |
166 | { | |
167 | int ret = 0; | |
168 | struct page *page; | |
169 | struct address_space *mapping; | |
170 | union swap_header *backing_swap_header; | |
171 | ||
172 | /* | |
173 | * There is no backing swap device. Create a swap header | |
174 | * that is acceptable by swapon. | |
175 | */ | |
176 | if (!rzs->backing_swap) { | |
177 | s->info.version = 1; | |
178 | s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1; | |
179 | s->info.nr_badpages = 0; | |
180 | memcpy(s->magic.magic, "SWAPSPACE2", 10); | |
181 | return 0; | |
182 | } | |
183 | ||
184 | /* | |
185 | * We have a backing swap device. Copy its swap header | |
186 | * to ramzswap device header. If this header contains | |
187 | * invalid information (backing device not a swap | |
188 | * partition, etc.), swapon will fail for ramzswap | |
189 | * which is correct behavior - we don't want to swap | |
190 | * over filesystem partition! | |
191 | */ | |
192 | ||
193 | /* Read the backing swap header (code from sys_swapon) */ | |
194 | mapping = rzs->swap_file->f_mapping; | |
195 | if (!mapping->a_ops->readpage) { | |
196 | ret = -EINVAL; | |
197 | goto out; | |
198 | } | |
199 | ||
200 | page = read_mapping_page(mapping, 0, rzs->swap_file); | |
201 | if (IS_ERR(page)) { | |
202 | ret = PTR_ERR(page); | |
203 | goto out; | |
204 | } | |
205 | ||
206 | backing_swap_header = kmap(page); | |
207 | memcpy(s, backing_swap_header, sizeof(*s)); | |
208 | if (s->info.nr_badpages) { | |
209 | pr_info("Cannot use backing swap with bad pages (%u)\n", | |
210 | s->info.nr_badpages); | |
211 | ret = -EINVAL; | |
212 | } | |
213 | /* | |
214 | * ramzswap disksize equals number of usable pages in backing | |
215 | * swap. Set last_page in swap header to match this disksize | |
216 | * ('last_page' means 0-based index of last usable swap page). | |
217 | */ | |
218 | s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1; | |
219 | kunmap(page); | |
220 | ||
221 | out: | |
222 | return ret; | |
223 | } | |
224 | ||
225 | static void ramzswap_flush_dcache_page(struct page *page) | |
226 | { | |
227 | #ifdef CONFIG_ARM | |
228 | int flag = 0; | |
229 | /* | |
230 | * Ugly hack to get flush_dcache_page() work on ARM. | |
231 | * page_mapping(page) == NULL after clearing this swap cache flag. | |
232 | * Without clearing this flag, flush_dcache_page() will simply set | |
233 | * "PG_dcache_dirty" bit and return. | |
234 | */ | |
235 | if (PageSwapCache(page)) { | |
236 | flag = 1; | |
237 | ClearPageSwapCache(page); | |
238 | } | |
239 | #endif | |
240 | flush_dcache_page(page); | |
241 | #ifdef CONFIG_ARM | |
242 | if (flag) | |
243 | SetPageSwapCache(page); | |
244 | #endif | |
245 | } | |
246 | ||
247 | void ramzswap_ioctl_get_stats(struct ramzswap *rzs, | |
248 | struct ramzswap_ioctl_stats *s) | |
249 | { | |
250 | strncpy(s->backing_swap_name, rzs->backing_swap_name, | |
251 | MAX_SWAP_NAME_LEN - 1); | |
252 | s->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0'; | |
253 | ||
254 | s->disksize = rzs->disksize; | |
255 | s->memlimit = rzs->memlimit; | |
256 | ||
257 | #if defined(CONFIG_RAMZSWAP_STATS) | |
258 | { | |
259 | struct ramzswap_stats *rs = &rzs->stats; | |
260 | size_t succ_writes, mem_used; | |
261 | unsigned int good_compress_perc = 0, no_compress_perc = 0; | |
262 | ||
263 | mem_used = xv_get_total_size_bytes(rzs->mem_pool) | |
264 | + (rs->pages_expand << PAGE_SHIFT); | |
265 | succ_writes = rs->num_writes - rs->failed_writes; | |
266 | ||
267 | if (succ_writes && rs->pages_stored) { | |
268 | good_compress_perc = rs->good_compress * 100 | |
269 | / rs->pages_stored; | |
270 | no_compress_perc = rs->pages_expand * 100 | |
271 | / rs->pages_stored; | |
272 | } | |
273 | ||
274 | s->num_reads = rs->num_reads; | |
275 | s->num_writes = rs->num_writes; | |
276 | s->failed_reads = rs->failed_reads; | |
277 | s->failed_writes = rs->failed_writes; | |
278 | s->invalid_io = rs->invalid_io; | |
279 | s->pages_zero = rs->pages_zero; | |
280 | ||
281 | s->good_compress_pct = good_compress_perc; | |
282 | s->pages_expand_pct = no_compress_perc; | |
283 | ||
284 | s->pages_stored = rs->pages_stored; | |
285 | s->pages_used = mem_used >> PAGE_SHIFT; | |
286 | s->orig_data_size = rs->pages_stored << PAGE_SHIFT; | |
287 | s->compr_data_size = rs->compr_size; | |
288 | s->mem_used_total = mem_used; | |
289 | ||
290 | s->bdev_num_reads = rs->bdev_num_reads; | |
291 | s->bdev_num_writes = rs->bdev_num_writes; | |
292 | } | |
293 | #endif /* CONFIG_RAMZSWAP_STATS */ | |
294 | } | |
295 | ||
296 | static int add_backing_swap_extent(struct ramzswap *rzs, | |
297 | pgoff_t phy_pagenum, | |
298 | pgoff_t num_pages) | |
299 | { | |
300 | unsigned int idx; | |
301 | struct list_head *head; | |
302 | struct page *curr_page, *new_page; | |
303 | unsigned int extents_per_page = PAGE_SIZE / | |
304 | sizeof(struct ramzswap_backing_extent); | |
305 | ||
306 | idx = rzs->num_extents % extents_per_page; | |
307 | if (!idx) { | |
308 | new_page = alloc_page(__GFP_ZERO); | |
309 | if (!new_page) | |
310 | return -ENOMEM; | |
311 | ||
312 | if (rzs->num_extents) { | |
313 | curr_page = virt_to_page(rzs->curr_extent); | |
314 | head = &curr_page->lru; | |
315 | } else { | |
316 | head = &rzs->backing_swap_extent_list; | |
317 | } | |
318 | ||
319 | list_add(&new_page->lru, head); | |
320 | rzs->curr_extent = page_address(new_page); | |
321 | } | |
322 | ||
323 | rzs->curr_extent->phy_pagenum = phy_pagenum; | |
324 | rzs->curr_extent->num_pages = num_pages; | |
325 | ||
326 | pr_debug("add_extent: idx=%u, phy_pgnum=%lu, num_pgs=%lu, " | |
327 | "pg_last=%lu, curr_ext=%p\n", idx, phy_pagenum, num_pages, | |
328 | phy_pagenum + num_pages - 1, rzs->curr_extent); | |
329 | ||
330 | if (idx != extents_per_page - 1) | |
331 | rzs->curr_extent++; | |
332 | ||
333 | return 0; | |
334 | } | |
335 | ||
336 | static int setup_backing_swap_extents(struct ramzswap *rzs, | |
337 | struct inode *inode, unsigned long *num_pages) | |
338 | { | |
339 | int ret = 0; | |
340 | unsigned blkbits; | |
341 | unsigned blocks_per_page; | |
342 | pgoff_t contig_pages = 0, total_pages = 0; | |
343 | pgoff_t pagenum = 0, prev_pagenum = 0; | |
344 | sector_t probe_block = 0; | |
345 | sector_t last_block; | |
346 | ||
347 | blkbits = inode->i_blkbits; | |
348 | blocks_per_page = PAGE_SIZE >> blkbits; | |
349 | ||
350 | last_block = i_size_read(inode) >> blkbits; | |
351 | while (probe_block + blocks_per_page <= last_block) { | |
352 | unsigned block_in_page; | |
353 | sector_t first_block; | |
354 | ||
355 | first_block = bmap(inode, probe_block); | |
356 | if (first_block == 0) | |
357 | goto bad_bmap; | |
358 | ||
359 | /* It must be PAGE_SIZE aligned on-disk */ | |
360 | if (first_block & (blocks_per_page - 1)) { | |
361 | probe_block++; | |
362 | goto probe_next; | |
363 | } | |
364 | ||
365 | /* All blocks within this page must be contiguous on disk */ | |
366 | for (block_in_page = 1; block_in_page < blocks_per_page; | |
367 | block_in_page++) { | |
368 | sector_t block; | |
369 | ||
370 | block = bmap(inode, probe_block + block_in_page); | |
371 | if (block == 0) | |
372 | goto bad_bmap; | |
373 | if (block != first_block + block_in_page) { | |
374 | /* Discontiguity */ | |
375 | probe_block++; | |
376 | goto probe_next; | |
377 | } | |
378 | } | |
379 | ||
380 | /* | |
381 | * We found a PAGE_SIZE length, PAGE_SIZE aligned | |
382 | * run of blocks. | |
383 | */ | |
384 | pagenum = first_block >> (PAGE_SHIFT - blkbits); | |
385 | ||
386 | if (total_pages && (pagenum != prev_pagenum + 1)) { | |
387 | ret = add_backing_swap_extent(rzs, prev_pagenum - | |
388 | (contig_pages - 1), contig_pages); | |
389 | if (ret < 0) | |
390 | goto out; | |
391 | rzs->num_extents++; | |
392 | contig_pages = 0; | |
393 | } | |
394 | total_pages++; | |
395 | contig_pages++; | |
396 | prev_pagenum = pagenum; | |
397 | probe_block += blocks_per_page; | |
398 | ||
399 | probe_next: | |
400 | continue; | |
401 | } | |
402 | ||
403 | if (contig_pages) { | |
404 | pr_debug("adding last extent: pagenum=%lu, " | |
405 | "contig_pages=%lu\n", pagenum, contig_pages); | |
406 | ret = add_backing_swap_extent(rzs, | |
407 | prev_pagenum - (contig_pages - 1), contig_pages); | |
408 | if (ret < 0) | |
409 | goto out; | |
410 | rzs->num_extents++; | |
411 | } | |
412 | if (!rzs->num_extents) { | |
413 | pr_err("No swap extents found!\n"); | |
414 | ret = -EINVAL; | |
415 | } | |
416 | ||
417 | if (!ret) { | |
418 | *num_pages = total_pages; | |
419 | pr_info("Found %lu extents containing %luk\n", | |
420 | rzs->num_extents, *num_pages << (PAGE_SHIFT - 10)); | |
421 | } | |
422 | goto out; | |
423 | ||
424 | bad_bmap: | |
425 | pr_err("Backing swapfile has holes\n"); | |
426 | ret = -EINVAL; | |
427 | out: | |
428 | while (ret && !list_empty(&rzs->backing_swap_extent_list)) { | |
429 | struct page *page; | |
430 | struct list_head *entry = rzs->backing_swap_extent_list.next; | |
431 | page = list_entry(entry, struct page, lru); | |
432 | list_del(entry); | |
433 | __free_page(page); | |
434 | } | |
435 | return ret; | |
436 | } | |
437 | ||
438 | static void map_backing_swap_extents(struct ramzswap *rzs) | |
439 | { | |
440 | struct ramzswap_backing_extent *se; | |
441 | struct page *table_page, *se_page; | |
442 | unsigned long num_pages, num_table_pages, entry; | |
443 | unsigned long se_idx, span; | |
444 | unsigned entries_per_page = PAGE_SIZE / sizeof(*rzs->table); | |
445 | unsigned extents_per_page = PAGE_SIZE / sizeof(*se); | |
446 | ||
447 | /* True for block device */ | |
448 | if (!rzs->num_extents) | |
449 | return; | |
450 | ||
451 | se_page = list_entry(rzs->backing_swap_extent_list.next, | |
452 | struct page, lru); | |
453 | se = page_address(se_page); | |
454 | span = se->num_pages; | |
455 | num_pages = rzs->disksize >> PAGE_SHIFT; | |
456 | num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table), | |
457 | PAGE_SIZE); | |
458 | ||
459 | entry = 0; | |
460 | se_idx = 0; | |
461 | while (num_table_pages--) { | |
462 | table_page = vmalloc_to_page(&rzs->table[entry]); | |
463 | while (span <= entry) { | |
464 | se_idx++; | |
465 | if (se_idx == rzs->num_extents) | |
466 | BUG(); | |
467 | ||
468 | if (!(se_idx % extents_per_page)) { | |
469 | se_page = list_entry(se_page->lru.next, | |
470 | struct page, lru); | |
471 | se = page_address(se_page); | |
472 | } else | |
473 | se++; | |
474 | ||
475 | span += se->num_pages; | |
476 | } | |
477 | table_page->mapping = (struct address_space *)se; | |
478 | table_page->private = se->num_pages - (span - entry); | |
479 | pr_debug("map_table: entry=%lu, span=%lu, map=%p, priv=%lu\n", | |
480 | entry, span, table_page->mapping, table_page->private); | |
481 | entry += entries_per_page; | |
482 | } | |
483 | } | |
484 | ||
485 | /* | |
486 | * Check if value of backing_swap module param is sane. | |
487 | * Claim this device and set ramzswap size equal to | |
488 | * size of this block device. | |
489 | */ | |
490 | static int setup_backing_swap(struct ramzswap *rzs) | |
491 | { | |
492 | int ret = 0; | |
493 | size_t disksize; | |
494 | unsigned long num_pages = 0; | |
495 | struct inode *inode; | |
496 | struct file *swap_file; | |
497 | struct address_space *mapping; | |
498 | struct block_device *bdev = NULL; | |
499 | ||
500 | if (!rzs->backing_swap_name[0]) { | |
501 | pr_debug("backing_swap param not given\n"); | |
502 | goto out; | |
503 | } | |
504 | ||
505 | pr_info("Using backing swap device: %s\n", rzs->backing_swap_name); | |
506 | ||
507 | swap_file = filp_open(rzs->backing_swap_name, | |
508 | O_RDWR | O_LARGEFILE, 0); | |
509 | if (IS_ERR(swap_file)) { | |
510 | pr_err("Error opening backing device: %s\n", | |
511 | rzs->backing_swap_name); | |
512 | ret = -EINVAL; | |
513 | goto out; | |
514 | } | |
515 | ||
516 | mapping = swap_file->f_mapping; | |
517 | inode = mapping->host; | |
518 | ||
519 | if (S_ISBLK(inode->i_mode)) { | |
520 | bdev = I_BDEV(inode); | |
521 | ret = bd_claim(bdev, setup_backing_swap); | |
522 | if (ret < 0) { | |
523 | bdev = NULL; | |
524 | goto bad_param; | |
525 | } | |
526 | disksize = i_size_read(inode); | |
527 | } else if (S_ISREG(inode->i_mode)) { | |
528 | bdev = inode->i_sb->s_bdev; | |
529 | if (IS_SWAPFILE(inode)) { | |
530 | ret = -EBUSY; | |
531 | goto bad_param; | |
532 | } | |
533 | ret = setup_backing_swap_extents(rzs, inode, &num_pages); | |
534 | if (ret < 0) | |
535 | goto bad_param; | |
536 | disksize = num_pages << PAGE_SHIFT; | |
537 | } else { | |
538 | goto bad_param; | |
539 | } | |
540 | ||
541 | rzs->swap_file = swap_file; | |
542 | rzs->backing_swap = bdev; | |
543 | rzs->disksize = disksize; | |
544 | BUG_ON(!rzs->disksize); | |
545 | ||
546 | return 0; | |
547 | ||
548 | bad_param: | |
549 | if (bdev) | |
550 | bd_release(bdev); | |
551 | filp_close(swap_file, NULL); | |
552 | ||
553 | out: | |
554 | rzs->backing_swap = NULL; | |
555 | return ret; | |
556 | } | |
557 | ||
558 | /* | |
559 | * Map logical page number 'pagenum' to physical page number | |
560 | * on backing swap device. For block device, this is a nop. | |
561 | */ | |
562 | u32 map_backing_swap_page(struct ramzswap *rzs, u32 pagenum) | |
563 | { | |
564 | u32 skip_pages, entries_per_page; | |
565 | size_t delta, se_offset, skipped; | |
566 | struct page *table_page, *se_page; | |
567 | struct ramzswap_backing_extent *se; | |
568 | ||
569 | if (!rzs->num_extents) | |
570 | return pagenum; | |
571 | ||
572 | entries_per_page = PAGE_SIZE / sizeof(*rzs->table); | |
573 | ||
574 | table_page = vmalloc_to_page(&rzs->table[pagenum]); | |
575 | se = (struct ramzswap_backing_extent *)table_page->mapping; | |
576 | se_page = virt_to_page(se); | |
577 | ||
578 | skip_pages = pagenum - (pagenum / entries_per_page * entries_per_page); | |
579 | se_offset = table_page->private + skip_pages; | |
580 | ||
581 | if (se_offset < se->num_pages) | |
582 | return se->phy_pagenum + se_offset; | |
583 | ||
584 | skipped = se->num_pages - table_page->private; | |
585 | do { | |
586 | struct ramzswap_backing_extent *se_base; | |
587 | u32 se_entries_per_page = PAGE_SIZE / sizeof(*se); | |
588 | ||
589 | /* Get next swap extent */ | |
590 | se_base = (struct ramzswap_backing_extent *) | |
591 | page_address(se_page); | |
592 | if (se - se_base == se_entries_per_page - 1) { | |
593 | se_page = list_entry(se_page->lru.next, | |
594 | struct page, lru); | |
595 | se = page_address(se_page); | |
596 | } else { | |
597 | se++; | |
598 | } | |
599 | ||
600 | skipped += se->num_pages; | |
601 | } while (skipped < skip_pages); | |
602 | ||
603 | delta = skipped - skip_pages; | |
604 | se_offset = se->num_pages - delta; | |
605 | ||
606 | return se->phy_pagenum + se_offset; | |
607 | } | |
608 | ||
609 | static void ramzswap_free_page(struct ramzswap *rzs, size_t index) | |
610 | { | |
611 | u32 clen; | |
612 | void *obj; | |
613 | ||
614 | struct page *page = rzs->table[index].page; | |
615 | u32 offset = rzs->table[index].offset; | |
616 | ||
617 | if (unlikely(!page)) { | |
618 | if (rzs_test_flag(rzs, index, RZS_ZERO)) { | |
619 | rzs_clear_flag(rzs, index, RZS_ZERO); | |
620 | stat_dec(rzs->stats.pages_zero); | |
621 | } | |
622 | return; | |
623 | } | |
624 | ||
625 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { | |
626 | clen = PAGE_SIZE; | |
627 | __free_page(page); | |
628 | rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED); | |
629 | stat_dec(rzs->stats.pages_expand); | |
630 | goto out; | |
631 | } | |
632 | ||
633 | obj = kmap_atomic(page, KM_USER0) + offset; | |
634 | clen = xv_get_object_size(obj) - sizeof(struct zobj_header); | |
635 | kunmap_atomic(obj, KM_USER0); | |
636 | ||
637 | xv_free(rzs->mem_pool, page, offset); | |
638 | if (clen <= PAGE_SIZE / 2) | |
639 | stat_dec(rzs->stats.good_compress); | |
640 | ||
641 | out: | |
642 | rzs->stats.compr_size -= clen; | |
643 | stat_dec(rzs->stats.pages_stored); | |
644 | ||
645 | rzs->table[index].page = NULL; | |
646 | rzs->table[index].offset = 0; | |
647 | } | |
648 | ||
649 | static int handle_zero_page(struct bio *bio) | |
650 | { | |
651 | void *user_mem; | |
652 | struct page *page = bio->bi_io_vec[0].bv_page; | |
653 | ||
654 | user_mem = kmap_atomic(page, KM_USER0); | |
655 | memset(user_mem, 0, PAGE_SIZE); | |
656 | kunmap_atomic(user_mem, KM_USER0); | |
657 | ||
658 | ramzswap_flush_dcache_page(page); | |
659 | ||
660 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
661 | bio_endio(bio, 0); | |
662 | return 0; | |
663 | } | |
664 | ||
665 | static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio) | |
666 | { | |
667 | u32 index; | |
668 | struct page *page; | |
669 | unsigned char *user_mem, *cmem; | |
670 | ||
671 | page = bio->bi_io_vec[0].bv_page; | |
672 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; | |
673 | ||
674 | user_mem = kmap_atomic(page, KM_USER0); | |
675 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + | |
676 | rzs->table[index].offset; | |
677 | ||
678 | memcpy(user_mem, cmem, PAGE_SIZE); | |
679 | kunmap_atomic(user_mem, KM_USER0); | |
680 | kunmap_atomic(cmem, KM_USER1); | |
681 | ||
682 | ramzswap_flush_dcache_page(page); | |
683 | ||
684 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
685 | bio_endio(bio, 0); | |
686 | return 0; | |
687 | } | |
688 | ||
689 | ||
690 | /* | |
691 | * Called when request page is not present in ramzswap. | |
692 | * Its either in backing swap device (if present) or | |
693 | * this is an attempt to read before any previous write | |
694 | * to this location - this happens due to readahead when | |
695 | * swap device is read from user-space (e.g. during swapon) | |
696 | */ | |
697 | static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio) | |
698 | { | |
699 | /* | |
700 | * Always forward such requests to backing swap | |
701 | * device (if present) | |
702 | */ | |
703 | if (rzs->backing_swap) { | |
704 | u32 pagenum; | |
705 | stat_dec(rzs->stats.num_reads); | |
706 | stat_inc(rzs->stats.bdev_num_reads); | |
707 | bio->bi_bdev = rzs->backing_swap; | |
708 | ||
709 | /* | |
710 | * In case backing swap is a file, find the right offset within | |
711 | * the file corresponding to logical position 'index'. For block | |
712 | * device, this is a nop. | |
713 | */ | |
714 | pagenum = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; | |
715 | bio->bi_sector = map_backing_swap_page(rzs, pagenum) | |
716 | << SECTORS_PER_PAGE_SHIFT; | |
717 | return 1; | |
718 | } | |
719 | ||
720 | /* | |
721 | * Its unlikely event in case backing dev is | |
722 | * not present | |
723 | */ | |
724 | pr_debug("Read before write on swap device: " | |
725 | "sector=%lu, size=%u, offset=%u\n", | |
726 | (ulong)(bio->bi_sector), bio->bi_size, | |
727 | bio->bi_io_vec[0].bv_offset); | |
728 | ||
729 | /* Do nothing. Just return success */ | |
730 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
731 | bio_endio(bio, 0); | |
732 | return 0; | |
733 | } | |
734 | ||
735 | static int ramzswap_read(struct ramzswap *rzs, struct bio *bio) | |
736 | { | |
737 | int ret; | |
738 | u32 index; | |
739 | size_t clen; | |
740 | struct page *page; | |
741 | struct zobj_header *zheader; | |
742 | unsigned char *user_mem, *cmem; | |
743 | ||
744 | stat_inc(rzs->stats.num_reads); | |
745 | ||
746 | page = bio->bi_io_vec[0].bv_page; | |
747 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; | |
748 | ||
749 | if (rzs_test_flag(rzs, index, RZS_ZERO)) | |
750 | return handle_zero_page(bio); | |
751 | ||
752 | /* Requested page is not present in compressed area */ | |
753 | if (!rzs->table[index].page) | |
754 | return handle_ramzswap_fault(rzs, bio); | |
755 | ||
756 | /* Page is stored uncompressed since its incompressible */ | |
757 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) | |
758 | return handle_uncompressed_page(rzs, bio); | |
759 | ||
760 | user_mem = kmap_atomic(page, KM_USER0); | |
761 | clen = PAGE_SIZE; | |
762 | ||
763 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + | |
764 | rzs->table[index].offset; | |
765 | ||
766 | ret = lzo1x_decompress_safe( | |
767 | cmem + sizeof(*zheader), | |
768 | xv_get_object_size(cmem) - sizeof(*zheader), | |
769 | user_mem, &clen); | |
770 | ||
771 | kunmap_atomic(user_mem, KM_USER0); | |
772 | kunmap_atomic(cmem, KM_USER1); | |
773 | ||
774 | /* should NEVER happen */ | |
775 | if (unlikely(ret != LZO_E_OK)) { | |
776 | pr_err("Decompression failed! err=%d, page=%u\n", | |
777 | ret, index); | |
778 | stat_inc(rzs->stats.failed_reads); | |
779 | goto out; | |
780 | } | |
781 | ||
782 | ramzswap_flush_dcache_page(page); | |
783 | ||
784 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
785 | bio_endio(bio, 0); | |
786 | return 0; | |
787 | ||
788 | out: | |
789 | bio_io_error(bio); | |
790 | return 0; | |
791 | } | |
792 | ||
793 | static int ramzswap_write(struct ramzswap *rzs, struct bio *bio) | |
794 | { | |
795 | int ret, fwd_write_request = 0; | |
796 | u32 offset, index; | |
797 | size_t clen; | |
798 | struct zobj_header *zheader; | |
799 | struct page *page, *page_store; | |
800 | unsigned char *user_mem, *cmem, *src; | |
801 | ||
802 | stat_inc(rzs->stats.num_writes); | |
803 | ||
804 | page = bio->bi_io_vec[0].bv_page; | |
805 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; | |
806 | ||
807 | src = rzs->compress_buffer; | |
808 | ||
809 | /* | |
810 | * System swaps to same sector again when the stored page | |
811 | * is no longer referenced by any process. So, its now safe | |
812 | * to free the memory that was allocated for this page. | |
813 | */ | |
814 | if (rzs->table[index].page) | |
815 | ramzswap_free_page(rzs, index); | |
816 | ||
817 | /* | |
818 | * No memory ia allocated for zero filled pages. | |
819 | * Simply clear zero page flag. | |
820 | */ | |
821 | if (rzs_test_flag(rzs, index, RZS_ZERO)) { | |
822 | stat_dec(rzs->stats.pages_zero); | |
823 | rzs_clear_flag(rzs, index, RZS_ZERO); | |
824 | } | |
825 | ||
826 | mutex_lock(&rzs->lock); | |
827 | ||
828 | user_mem = kmap_atomic(page, KM_USER0); | |
829 | if (page_zero_filled(user_mem)) { | |
830 | kunmap_atomic(user_mem, KM_USER0); | |
831 | mutex_unlock(&rzs->lock); | |
832 | stat_inc(rzs->stats.pages_zero); | |
833 | rzs_set_flag(rzs, index, RZS_ZERO); | |
834 | ||
835 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
836 | bio_endio(bio, 0); | |
837 | return 0; | |
838 | } | |
839 | ||
840 | if (rzs->backing_swap && | |
841 | (rzs->stats.compr_size > rzs->memlimit - PAGE_SIZE)) { | |
842 | kunmap_atomic(user_mem, KM_USER0); | |
843 | mutex_unlock(&rzs->lock); | |
844 | fwd_write_request = 1; | |
845 | goto out; | |
846 | } | |
847 | ||
848 | ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen, | |
849 | rzs->compress_workmem); | |
850 | ||
851 | kunmap_atomic(user_mem, KM_USER0); | |
852 | ||
853 | if (unlikely(ret != LZO_E_OK)) { | |
854 | mutex_unlock(&rzs->lock); | |
855 | pr_err("Compression failed! err=%d\n", ret); | |
856 | stat_inc(rzs->stats.failed_writes); | |
857 | goto out; | |
858 | } | |
859 | ||
860 | /* | |
861 | * Page is incompressible. Forward it to backing swap | |
862 | * if present. Otherwise, store it as-is (uncompressed) | |
863 | * since we do not want to return too many swap write | |
864 | * errors which has side effect of hanging the system. | |
865 | */ | |
866 | if (unlikely(clen > max_zpage_size)) { | |
867 | if (rzs->backing_swap) { | |
868 | mutex_unlock(&rzs->lock); | |
869 | fwd_write_request = 1; | |
870 | goto out; | |
871 | } | |
872 | ||
873 | clen = PAGE_SIZE; | |
874 | page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); | |
875 | if (unlikely(!page_store)) { | |
876 | mutex_unlock(&rzs->lock); | |
877 | pr_info("Error allocating memory for incompressible " | |
878 | "page: %u\n", index); | |
879 | stat_inc(rzs->stats.failed_writes); | |
880 | goto out; | |
881 | } | |
882 | ||
883 | offset = 0; | |
884 | rzs_set_flag(rzs, index, RZS_UNCOMPRESSED); | |
885 | stat_inc(rzs->stats.pages_expand); | |
886 | rzs->table[index].page = page_store; | |
887 | src = kmap_atomic(page, KM_USER0); | |
888 | goto memstore; | |
889 | } | |
890 | ||
891 | if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader), | |
892 | &rzs->table[index].page, &offset, | |
893 | GFP_NOIO | __GFP_HIGHMEM)) { | |
894 | mutex_unlock(&rzs->lock); | |
895 | pr_info("Error allocating memory for compressed " | |
896 | "page: %u, size=%zu\n", index, clen); | |
897 | stat_inc(rzs->stats.failed_writes); | |
898 | if (rzs->backing_swap) | |
899 | fwd_write_request = 1; | |
900 | goto out; | |
901 | } | |
902 | ||
903 | memstore: | |
904 | rzs->table[index].offset = offset; | |
905 | ||
906 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + | |
907 | rzs->table[index].offset; | |
908 | ||
909 | #if 0 | |
910 | /* Back-reference needed for memory defragmentation */ | |
911 | if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) { | |
912 | zheader = (struct zobj_header *)cmem; | |
913 | zheader->table_idx = index; | |
914 | cmem += sizeof(*zheader); | |
915 | } | |
916 | #endif | |
917 | ||
918 | memcpy(cmem, src, clen); | |
919 | ||
920 | kunmap_atomic(cmem, KM_USER1); | |
921 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) | |
922 | kunmap_atomic(src, KM_USER0); | |
923 | ||
924 | /* Update stats */ | |
925 | rzs->stats.compr_size += clen; | |
926 | stat_inc(rzs->stats.pages_stored); | |
927 | if (clen <= PAGE_SIZE / 2) | |
928 | stat_inc(rzs->stats.good_compress); | |
929 | ||
930 | mutex_unlock(&rzs->lock); | |
931 | ||
932 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
933 | bio_endio(bio, 0); | |
934 | return 0; | |
935 | ||
936 | out: | |
937 | if (fwd_write_request) { | |
938 | stat_inc(rzs->stats.bdev_num_writes); | |
939 | bio->bi_bdev = rzs->backing_swap; | |
940 | #if 0 | |
941 | /* | |
942 | * TODO: We currently have linear mapping of ramzswap and | |
943 | * backing swap sectors. This is not desired since we want | |
944 | * to optimize writes to backing swap to minimize disk seeks | |
945 | * or have effective wear leveling (for SSDs). Also, a | |
946 | * non-linear mapping is required to implement compressed | |
947 | * on-disk swapping. | |
948 | */ | |
949 | bio->bi_sector = get_backing_swap_page() | |
950 | << SECTORS_PER_PAGE_SHIFT; | |
951 | #endif | |
952 | /* | |
953 | * In case backing swap is a file, find the right offset within | |
954 | * the file corresponding to logical position 'index'. For block | |
955 | * device, this is a nop. | |
956 | */ | |
957 | bio->bi_sector = map_backing_swap_page(rzs, index) | |
958 | << SECTORS_PER_PAGE_SHIFT; | |
959 | return 1; | |
960 | } | |
961 | ||
962 | bio_io_error(bio); | |
963 | return 0; | |
964 | } | |
965 | ||
966 | ||
967 | /* | |
968 | * Check if request is within bounds and page aligned. | |
969 | */ | |
970 | static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio) | |
971 | { | |
972 | if (unlikely( | |
973 | (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) || | |
974 | (bio->bi_sector & (SECTORS_PER_PAGE - 1)) || | |
975 | (bio->bi_vcnt != 1) || | |
976 | (bio->bi_size != PAGE_SIZE) || | |
977 | (bio->bi_io_vec[0].bv_offset != 0))) { | |
978 | ||
979 | return 0; | |
980 | } | |
981 | ||
982 | /* swap request is valid */ | |
983 | return 1; | |
984 | } | |
985 | ||
986 | /* | |
987 | * Handler function for all ramzswap I/O requests. | |
988 | */ | |
989 | static int ramzswap_make_request(struct request_queue *queue, struct bio *bio) | |
990 | { | |
991 | int ret = 0; | |
992 | struct ramzswap *rzs = queue->queuedata; | |
993 | ||
994 | if (unlikely(!rzs->init_done)) { | |
995 | bio_io_error(bio); | |
996 | return 0; | |
997 | } | |
998 | ||
999 | if (!valid_swap_request(rzs, bio)) { | |
1000 | stat_inc(rzs->stats.invalid_io); | |
1001 | bio_io_error(bio); | |
1002 | return 0; | |
1003 | } | |
1004 | ||
1005 | switch (bio_data_dir(bio)) { | |
1006 | case READ: | |
1007 | ret = ramzswap_read(rzs, bio); | |
1008 | break; | |
1009 | ||
1010 | case WRITE: | |
1011 | ret = ramzswap_write(rzs, bio); | |
1012 | break; | |
1013 | } | |
1014 | ||
1015 | return ret; | |
1016 | } | |
1017 | ||
1018 | static void reset_device(struct ramzswap *rzs) | |
1019 | { | |
1020 | int is_backing_blkdev = 0; | |
1021 | size_t index, num_pages; | |
1022 | unsigned entries_per_page; | |
1023 | unsigned long num_table_pages, entry = 0; | |
1024 | ||
1025 | if (rzs->backing_swap && !rzs->num_extents) | |
1026 | is_backing_blkdev = 1; | |
1027 | ||
1028 | num_pages = rzs->disksize >> PAGE_SHIFT; | |
1029 | ||
1030 | /* Free various per-device buffers */ | |
1031 | kfree(rzs->compress_workmem); | |
1032 | free_pages((unsigned long)rzs->compress_buffer, 1); | |
1033 | ||
1034 | rzs->compress_workmem = NULL; | |
1035 | rzs->compress_buffer = NULL; | |
1036 | ||
1037 | /* Free all pages that are still in this ramzswap device */ | |
1038 | for (index = 0; index < num_pages; index++) { | |
1039 | struct page *page; | |
1040 | u16 offset; | |
1041 | ||
1042 | page = rzs->table[index].page; | |
1043 | offset = rzs->table[index].offset; | |
1044 | ||
1045 | if (!page) | |
1046 | continue; | |
1047 | ||
1048 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) | |
1049 | __free_page(page); | |
1050 | else | |
1051 | xv_free(rzs->mem_pool, page, offset); | |
1052 | } | |
1053 | ||
1054 | entries_per_page = PAGE_SIZE / sizeof(*rzs->table); | |
1055 | num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table), | |
1056 | PAGE_SIZE); | |
1057 | /* | |
1058 | * Set page->mapping to NULL for every table page. | |
1059 | * Otherwise, we will hit bad_page() during free. | |
1060 | */ | |
1061 | while (rzs->num_extents && num_table_pages--) { | |
1062 | struct page *page; | |
1063 | page = vmalloc_to_page(&rzs->table[entry]); | |
1064 | page->mapping = NULL; | |
1065 | entry += entries_per_page; | |
1066 | } | |
1067 | vfree(rzs->table); | |
1068 | rzs->table = NULL; | |
1069 | ||
1070 | xv_destroy_pool(rzs->mem_pool); | |
1071 | rzs->mem_pool = NULL; | |
1072 | ||
1073 | /* Free all swap extent pages */ | |
1074 | while (!list_empty(&rzs->backing_swap_extent_list)) { | |
1075 | struct page *page; | |
1076 | struct list_head *entry; | |
1077 | entry = rzs->backing_swap_extent_list.next; | |
1078 | page = list_entry(entry, struct page, lru); | |
1079 | list_del(entry); | |
1080 | __free_page(page); | |
1081 | } | |
1082 | INIT_LIST_HEAD(&rzs->backing_swap_extent_list); | |
1083 | rzs->num_extents = 0; | |
1084 | ||
1085 | /* Close backing swap device, if present */ | |
1086 | if (rzs->backing_swap) { | |
1087 | if (is_backing_blkdev) | |
1088 | bd_release(rzs->backing_swap); | |
1089 | filp_close(rzs->swap_file, NULL); | |
1090 | rzs->backing_swap = NULL; | |
1091 | } | |
1092 | ||
1093 | /* Reset stats */ | |
1094 | memset(&rzs->stats, 0, sizeof(rzs->stats)); | |
1095 | ||
1096 | rzs->disksize = 0; | |
1097 | rzs->memlimit = 0; | |
1098 | ||
1099 | /* Back to uninitialized state */ | |
1100 | rzs->init_done = 0; | |
1101 | } | |
1102 | ||
1103 | static int ramzswap_ioctl_init_device(struct ramzswap *rzs) | |
1104 | { | |
1105 | int ret; | |
1106 | size_t num_pages; | |
1107 | struct page *page; | |
1108 | union swap_header *swap_header; | |
1109 | ||
1110 | if (rzs->init_done) { | |
1111 | pr_info("Device already initialized!\n"); | |
1112 | return -EBUSY; | |
1113 | } | |
1114 | ||
1115 | ret = setup_backing_swap(rzs); | |
1116 | if (ret) | |
1117 | goto fail; | |
1118 | ||
1119 | if (rzs->backing_swap) | |
1120 | ramzswap_set_memlimit(rzs, totalram_pages << PAGE_SHIFT); | |
1121 | else | |
1122 | ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT); | |
1123 | ||
1124 | rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); | |
1125 | if (!rzs->compress_workmem) { | |
1126 | pr_err("Error allocating compressor working memory!\n"); | |
1127 | ret = -ENOMEM; | |
1128 | goto fail; | |
1129 | } | |
1130 | ||
1131 | rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); | |
1132 | if (!rzs->compress_buffer) { | |
1133 | pr_err("Error allocating compressor buffer space\n"); | |
1134 | ret = -ENOMEM; | |
1135 | goto fail; | |
1136 | } | |
1137 | ||
1138 | num_pages = rzs->disksize >> PAGE_SHIFT; | |
1139 | rzs->table = vmalloc(num_pages * sizeof(*rzs->table)); | |
1140 | if (!rzs->table) { | |
1141 | pr_err("Error allocating ramzswap address table\n"); | |
1142 | /* To prevent accessing table entries during cleanup */ | |
1143 | rzs->disksize = 0; | |
1144 | ret = -ENOMEM; | |
1145 | goto fail; | |
1146 | } | |
1147 | memset(rzs->table, 0, num_pages * sizeof(*rzs->table)); | |
1148 | ||
1149 | map_backing_swap_extents(rzs); | |
1150 | ||
1151 | page = alloc_page(__GFP_ZERO); | |
1152 | if (!page) { | |
1153 | pr_err("Error allocating swap header page\n"); | |
1154 | ret = -ENOMEM; | |
1155 | goto fail; | |
1156 | } | |
1157 | rzs->table[0].page = page; | |
1158 | rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED); | |
1159 | ||
1160 | swap_header = kmap(page); | |
1161 | ret = setup_swap_header(rzs, swap_header); | |
1162 | kunmap(page); | |
1163 | if (ret) { | |
1164 | pr_err("Error setting swap header\n"); | |
1165 | goto fail; | |
1166 | } | |
1167 | ||
1168 | set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT); | |
1169 | ||
1170 | /* | |
1171 | * We have ident mapping of sectors for ramzswap and | |
1172 | * and the backing swap device. So, this queue flag | |
1173 | * should be according to backing dev. | |
1174 | */ | |
1175 | if (!rzs->backing_swap || | |
1176 | blk_queue_nonrot(rzs->backing_swap->bd_disk->queue)) | |
1177 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue); | |
1178 | ||
1179 | rzs->mem_pool = xv_create_pool(); | |
1180 | if (!rzs->mem_pool) { | |
1181 | pr_err("Error creating memory pool\n"); | |
1182 | ret = -ENOMEM; | |
1183 | goto fail; | |
1184 | } | |
1185 | ||
1186 | /* | |
1187 | * Pages that compress to size greater than this are forwarded | |
1188 | * to physical swap disk (if backing dev is provided) | |
1189 | * TODO: make this configurable | |
1190 | */ | |
1191 | if (rzs->backing_swap) | |
1192 | max_zpage_size = max_zpage_size_bdev; | |
1193 | else | |
1194 | max_zpage_size = max_zpage_size_nobdev; | |
1195 | pr_debug("Max compressed page size: %u bytes\n", max_zpage_size); | |
1196 | ||
1197 | rzs->init_done = 1; | |
1198 | ||
1199 | pr_debug("Initialization done!\n"); | |
1200 | return 0; | |
1201 | ||
1202 | fail: | |
1203 | reset_device(rzs); | |
1204 | ||
1205 | pr_err("Initialization failed: err=%d\n", ret); | |
1206 | return ret; | |
1207 | } | |
1208 | ||
1209 | static int ramzswap_ioctl_reset_device(struct ramzswap *rzs) | |
1210 | { | |
1211 | if (rzs->init_done) | |
1212 | reset_device(rzs); | |
1213 | ||
1214 | return 0; | |
1215 | } | |
1216 | ||
1217 | static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode, | |
1218 | unsigned int cmd, unsigned long arg) | |
1219 | { | |
1220 | int ret = 0; | |
1221 | size_t disksize_kb, memlimit_kb; | |
1222 | ||
1223 | struct ramzswap *rzs = bdev->bd_disk->private_data; | |
1224 | ||
1225 | switch (cmd) { | |
1226 | case RZSIO_SET_DISKSIZE_KB: | |
1227 | if (rzs->init_done) { | |
1228 | ret = -EBUSY; | |
1229 | goto out; | |
1230 | } | |
1231 | if (copy_from_user(&disksize_kb, (void *)arg, | |
1232 | _IOC_SIZE(cmd))) { | |
1233 | ret = -EFAULT; | |
1234 | goto out; | |
1235 | } | |
1236 | rzs->disksize = disksize_kb << 10; | |
1237 | pr_info("Disk size set to %zu kB\n", disksize_kb); | |
1238 | break; | |
1239 | ||
1240 | case RZSIO_SET_MEMLIMIT_KB: | |
1241 | if (rzs->init_done) { | |
1242 | /* TODO: allow changing memlimit */ | |
1243 | ret = -EBUSY; | |
1244 | goto out; | |
1245 | } | |
1246 | if (copy_from_user(&memlimit_kb, (void *)arg, | |
1247 | _IOC_SIZE(cmd))) { | |
1248 | ret = -EFAULT; | |
1249 | goto out; | |
1250 | } | |
1251 | rzs->memlimit = memlimit_kb << 10; | |
1252 | pr_info("Memory limit set to %zu kB\n", memlimit_kb); | |
1253 | break; | |
1254 | ||
1255 | case RZSIO_SET_BACKING_SWAP: | |
1256 | if (rzs->init_done) { | |
1257 | ret = -EBUSY; | |
1258 | goto out; | |
1259 | } | |
1260 | ||
1261 | if (copy_from_user(&rzs->backing_swap_name, (void *)arg, | |
1262 | _IOC_SIZE(cmd))) { | |
1263 | ret = -EFAULT; | |
1264 | goto out; | |
1265 | } | |
1266 | rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0'; | |
1267 | pr_info("Backing swap set to %s\n", rzs->backing_swap_name); | |
1268 | break; | |
1269 | ||
1270 | case RZSIO_GET_STATS: | |
1271 | { | |
1272 | struct ramzswap_ioctl_stats *stats; | |
1273 | if (!rzs->init_done) { | |
1274 | ret = -ENOTTY; | |
1275 | goto out; | |
1276 | } | |
1277 | stats = kzalloc(sizeof(*stats), GFP_KERNEL); | |
1278 | if (!stats) { | |
1279 | ret = -ENOMEM; | |
1280 | goto out; | |
1281 | } | |
1282 | ramzswap_ioctl_get_stats(rzs, stats); | |
1283 | if (copy_to_user((void *)arg, stats, sizeof(*stats))) { | |
1284 | kfree(stats); | |
1285 | ret = -EFAULT; | |
1286 | goto out; | |
1287 | } | |
1288 | kfree(stats); | |
1289 | break; | |
1290 | } | |
1291 | case RZSIO_INIT: | |
1292 | ret = ramzswap_ioctl_init_device(rzs); | |
1293 | break; | |
1294 | ||
1295 | case RZSIO_RESET: | |
1296 | /* Do not reset an active device! */ | |
1297 | if (bdev->bd_holders) { | |
1298 | ret = -EBUSY; | |
1299 | goto out; | |
1300 | } | |
1301 | ret = ramzswap_ioctl_reset_device(rzs); | |
1302 | break; | |
1303 | ||
1304 | default: | |
1305 | pr_info("Invalid ioctl %u\n", cmd); | |
1306 | ret = -ENOTTY; | |
1307 | } | |
1308 | ||
1309 | out: | |
1310 | return ret; | |
1311 | } | |
1312 | ||
1313 | static struct block_device_operations ramzswap_devops = { | |
1314 | .ioctl = ramzswap_ioctl, | |
1315 | .owner = THIS_MODULE, | |
1316 | }; | |
1317 | ||
1318 | static void create_device(struct ramzswap *rzs, int device_id) | |
1319 | { | |
1320 | mutex_init(&rzs->lock); | |
1321 | INIT_LIST_HEAD(&rzs->backing_swap_extent_list); | |
1322 | ||
1323 | rzs->queue = blk_alloc_queue(GFP_KERNEL); | |
1324 | if (!rzs->queue) { | |
1325 | pr_err("Error allocating disk queue for device %d\n", | |
1326 | device_id); | |
1327 | return; | |
1328 | } | |
1329 | ||
1330 | blk_queue_make_request(rzs->queue, ramzswap_make_request); | |
1331 | rzs->queue->queuedata = rzs; | |
1332 | ||
1333 | /* gendisk structure */ | |
1334 | rzs->disk = alloc_disk(1); | |
1335 | if (!rzs->disk) { | |
1336 | blk_cleanup_queue(rzs->queue); | |
1337 | pr_warning("Error allocating disk structure for device %d\n", | |
1338 | device_id); | |
1339 | return; | |
1340 | } | |
1341 | ||
1342 | rzs->disk->major = ramzswap_major; | |
1343 | rzs->disk->first_minor = device_id; | |
1344 | rzs->disk->fops = &ramzswap_devops; | |
1345 | rzs->disk->queue = rzs->queue; | |
1346 | rzs->disk->private_data = rzs; | |
1347 | snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id); | |
1348 | ||
1349 | /* | |
1350 | * Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl | |
1351 | * or set equal to backing swap device (if provided) | |
1352 | */ | |
1353 | set_capacity(rzs->disk, 0); | |
1354 | add_disk(rzs->disk); | |
1355 | ||
1356 | rzs->init_done = 0; | |
1357 | } | |
1358 | ||
1359 | static void destroy_device(struct ramzswap *rzs) | |
1360 | { | |
1361 | if (rzs->disk) { | |
1362 | del_gendisk(rzs->disk); | |
1363 | put_disk(rzs->disk); | |
1364 | } | |
1365 | ||
1366 | if (rzs->queue) | |
1367 | blk_cleanup_queue(rzs->queue); | |
1368 | } | |
1369 | ||
1370 | static int __init ramzswap_init(void) | |
1371 | { | |
1372 | int i, ret; | |
1373 | ||
1374 | if (num_devices > max_num_devices) { | |
1375 | pr_warning("Invalid value for num_devices: %u\n", | |
1376 | num_devices); | |
1377 | return -EINVAL; | |
1378 | } | |
1379 | ||
1380 | ramzswap_major = register_blkdev(0, "ramzswap"); | |
1381 | if (ramzswap_major <= 0) { | |
1382 | pr_warning("Unable to get major number\n"); | |
1383 | return -EBUSY; | |
1384 | } | |
1385 | ||
1386 | if (!num_devices) { | |
1387 | pr_info("num_devices not specified. Using default: 1\n"); | |
1388 | num_devices = 1; | |
1389 | } | |
1390 | ||
1391 | /* Allocate the device array and initialize each one */ | |
1392 | pr_info("Creating %u devices ...\n", num_devices); | |
1393 | devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL); | |
1394 | if (!devices) { | |
1395 | ret = -ENOMEM; | |
1396 | goto out; | |
1397 | } | |
1398 | ||
1399 | for (i = 0; i < num_devices; i++) | |
1400 | create_device(&devices[i], i); | |
1401 | ||
1402 | return 0; | |
1403 | out: | |
1404 | unregister_blkdev(ramzswap_major, "ramzswap"); | |
1405 | return ret; | |
1406 | } | |
1407 | ||
1408 | static void __exit ramzswap_exit(void) | |
1409 | { | |
1410 | int i; | |
1411 | struct ramzswap *rzs; | |
1412 | ||
1413 | for (i = 0; i < num_devices; i++) { | |
1414 | rzs = &devices[i]; | |
1415 | ||
1416 | destroy_device(rzs); | |
1417 | if (rzs->init_done) | |
1418 | reset_device(rzs); | |
1419 | } | |
1420 | ||
1421 | unregister_blkdev(ramzswap_major, "ramzswap"); | |
1422 | ||
1423 | kfree(devices); | |
1424 | pr_debug("Cleanup done!\n"); | |
1425 | } | |
1426 | ||
1427 | module_param(num_devices, uint, 0); | |
1428 | MODULE_PARM_DESC(num_devices, "Number of ramzswap devices"); | |
1429 | ||
1430 | module_init(ramzswap_init); | |
1431 | module_exit(ramzswap_exit); | |
1432 | ||
1433 | MODULE_LICENSE("Dual BSD/GPL"); | |
1434 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
1435 | MODULE_DESCRIPTION("Compressed RAM Based Swap Device"); |