Commit | Line | Data |
---|---|---|
306b0c95 NG |
1 | /* |
2 | * Compressed RAM based swap device | |
3 | * | |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
306b0c95 NG |
5 | * |
6 | * This code is released using a dual license strategy: BSD/GPL | |
7 | * You can choose the licence that better fits your requirements. | |
8 | * | |
9 | * Released under the terms of 3-clause BSD License | |
10 | * Released under the terms of GNU General Public License Version 2.0 | |
11 | * | |
12 | * Project home: http://compcache.googlecode.com | |
13 | */ | |
14 | ||
15 | #define KMSG_COMPONENT "ramzswap" | |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
20 | #include <linux/bitops.h> | |
21 | #include <linux/blkdev.h> | |
22 | #include <linux/buffer_head.h> | |
23 | #include <linux/device.h> | |
24 | #include <linux/genhd.h> | |
25 | #include <linux/highmem.h> | |
5a0e3ad6 | 26 | #include <linux/slab.h> |
306b0c95 | 27 | #include <linux/lzo.h> |
306b0c95 NG |
28 | #include <linux/string.h> |
29 | #include <linux/swap.h> | |
30 | #include <linux/swapops.h> | |
31 | #include <linux/vmalloc.h> | |
306b0c95 NG |
32 | |
33 | #include "ramzswap_drv.h" | |
34 | ||
35 | /* Globals */ | |
36 | static int ramzswap_major; | |
37 | static struct ramzswap *devices; | |
38 | ||
306b0c95 NG |
39 | /* Module params (documentation at end) */ |
40 | static unsigned int num_devices; | |
41 | ||
42 | static int rzs_test_flag(struct ramzswap *rzs, u32 index, | |
43 | enum rzs_pageflags flag) | |
44 | { | |
45 | return rzs->table[index].flags & BIT(flag); | |
46 | } | |
47 | ||
48 | static void rzs_set_flag(struct ramzswap *rzs, u32 index, | |
49 | enum rzs_pageflags flag) | |
50 | { | |
51 | rzs->table[index].flags |= BIT(flag); | |
52 | } | |
53 | ||
54 | static void rzs_clear_flag(struct ramzswap *rzs, u32 index, | |
55 | enum rzs_pageflags flag) | |
56 | { | |
57 | rzs->table[index].flags &= ~BIT(flag); | |
58 | } | |
59 | ||
60 | static int page_zero_filled(void *ptr) | |
61 | { | |
62 | unsigned int pos; | |
63 | unsigned long *page; | |
64 | ||
65 | page = (unsigned long *)ptr; | |
66 | ||
67 | for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { | |
68 | if (page[pos]) | |
69 | return 0; | |
70 | } | |
71 | ||
72 | return 1; | |
73 | } | |
74 | ||
306b0c95 NG |
75 | static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes) |
76 | { | |
77 | if (!rzs->disksize) { | |
78 | pr_info( | |
79 | "disk size not provided. You can use disksize_kb module " | |
80 | "param to specify size.\nUsing default: (%u%% of RAM).\n", | |
81 | default_disksize_perc_ram | |
82 | ); | |
83 | rzs->disksize = default_disksize_perc_ram * | |
84 | (totalram_bytes / 100); | |
85 | } | |
86 | ||
87 | if (rzs->disksize > 2 * (totalram_bytes)) { | |
88 | pr_info( | |
89 | "There is little point creating a ramzswap of greater than " | |
90 | "twice the size of memory since we expect a 2:1 compression " | |
91 | "ratio. Note that ramzswap uses about 0.1%% of the size of " | |
92 | "the swap device when not in use so a huge ramzswap is " | |
93 | "wasteful.\n" | |
94 | "\tMemory Size: %zu kB\n" | |
95 | "\tSize you selected: %zu kB\n" | |
96 | "Continuing anyway ...\n", | |
97 | totalram_bytes >> 10, rzs->disksize | |
98 | ); | |
99 | } | |
100 | ||
101 | rzs->disksize &= PAGE_MASK; | |
102 | } | |
103 | ||
c25d75a2 | 104 | static void ramzswap_ioctl_get_stats(struct ramzswap *rzs, |
306b0c95 NG |
105 | struct ramzswap_ioctl_stats *s) |
106 | { | |
306b0c95 | 107 | s->disksize = rzs->disksize; |
306b0c95 NG |
108 | |
109 | #if defined(CONFIG_RAMZSWAP_STATS) | |
110 | { | |
111 | struct ramzswap_stats *rs = &rzs->stats; | |
112 | size_t succ_writes, mem_used; | |
113 | unsigned int good_compress_perc = 0, no_compress_perc = 0; | |
114 | ||
115 | mem_used = xv_get_total_size_bytes(rzs->mem_pool) | |
116 | + (rs->pages_expand << PAGE_SHIFT); | |
6a907728 NG |
117 | succ_writes = rzs_stat64_read(rzs, &rs->num_writes) - |
118 | rzs_stat64_read(rzs, &rs->failed_writes); | |
306b0c95 NG |
119 | |
120 | if (succ_writes && rs->pages_stored) { | |
121 | good_compress_perc = rs->good_compress * 100 | |
122 | / rs->pages_stored; | |
123 | no_compress_perc = rs->pages_expand * 100 | |
124 | / rs->pages_stored; | |
125 | } | |
126 | ||
6a907728 NG |
127 | s->num_reads = rzs_stat64_read(rzs, &rs->num_reads); |
128 | s->num_writes = rzs_stat64_read(rzs, &rs->num_writes); | |
129 | s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads); | |
130 | s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes); | |
131 | s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io); | |
132 | s->notify_free = rzs_stat64_read(rzs, &rs->notify_free); | |
306b0c95 NG |
133 | s->pages_zero = rs->pages_zero; |
134 | ||
135 | s->good_compress_pct = good_compress_perc; | |
136 | s->pages_expand_pct = no_compress_perc; | |
137 | ||
138 | s->pages_stored = rs->pages_stored; | |
139 | s->pages_used = mem_used >> PAGE_SHIFT; | |
140 | s->orig_data_size = rs->pages_stored << PAGE_SHIFT; | |
141 | s->compr_data_size = rs->compr_size; | |
142 | s->mem_used_total = mem_used; | |
306b0c95 NG |
143 | } |
144 | #endif /* CONFIG_RAMZSWAP_STATS */ | |
145 | } | |
146 | ||
306b0c95 NG |
147 | static void ramzswap_free_page(struct ramzswap *rzs, size_t index) |
148 | { | |
149 | u32 clen; | |
150 | void *obj; | |
151 | ||
152 | struct page *page = rzs->table[index].page; | |
153 | u32 offset = rzs->table[index].offset; | |
154 | ||
155 | if (unlikely(!page)) { | |
2e882281 NG |
156 | /* |
157 | * No memory is allocated for zero filled pages. | |
158 | * Simply clear zero page flag. | |
159 | */ | |
306b0c95 NG |
160 | if (rzs_test_flag(rzs, index, RZS_ZERO)) { |
161 | rzs_clear_flag(rzs, index, RZS_ZERO); | |
6a907728 | 162 | rzs_stat_dec(&rzs->stats.pages_zero); |
306b0c95 NG |
163 | } |
164 | return; | |
165 | } | |
166 | ||
167 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { | |
168 | clen = PAGE_SIZE; | |
169 | __free_page(page); | |
170 | rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED); | |
6a907728 | 171 | rzs_stat_dec(&rzs->stats.pages_expand); |
306b0c95 NG |
172 | goto out; |
173 | } | |
174 | ||
175 | obj = kmap_atomic(page, KM_USER0) + offset; | |
176 | clen = xv_get_object_size(obj) - sizeof(struct zobj_header); | |
177 | kunmap_atomic(obj, KM_USER0); | |
178 | ||
179 | xv_free(rzs->mem_pool, page, offset); | |
180 | if (clen <= PAGE_SIZE / 2) | |
6a907728 | 181 | rzs_stat_dec(&rzs->stats.good_compress); |
306b0c95 NG |
182 | |
183 | out: | |
184 | rzs->stats.compr_size -= clen; | |
6a907728 | 185 | rzs_stat_dec(&rzs->stats.pages_stored); |
306b0c95 NG |
186 | |
187 | rzs->table[index].page = NULL; | |
188 | rzs->table[index].offset = 0; | |
189 | } | |
190 | ||
a1dd52af | 191 | static void handle_zero_page(struct page *page) |
306b0c95 NG |
192 | { |
193 | void *user_mem; | |
306b0c95 NG |
194 | |
195 | user_mem = kmap_atomic(page, KM_USER0); | |
196 | memset(user_mem, 0, PAGE_SIZE); | |
197 | kunmap_atomic(user_mem, KM_USER0); | |
198 | ||
30fb8a71 | 199 | flush_dcache_page(page); |
306b0c95 NG |
200 | } |
201 | ||
a1dd52af NG |
202 | static void handle_uncompressed_page(struct ramzswap *rzs, |
203 | struct page *page, u32 index) | |
306b0c95 | 204 | { |
306b0c95 NG |
205 | unsigned char *user_mem, *cmem; |
206 | ||
306b0c95 NG |
207 | user_mem = kmap_atomic(page, KM_USER0); |
208 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + | |
209 | rzs->table[index].offset; | |
210 | ||
211 | memcpy(user_mem, cmem, PAGE_SIZE); | |
212 | kunmap_atomic(user_mem, KM_USER0); | |
213 | kunmap_atomic(cmem, KM_USER1); | |
214 | ||
30fb8a71 | 215 | flush_dcache_page(page); |
306b0c95 NG |
216 | } |
217 | ||
218 | static int ramzswap_read(struct ramzswap *rzs, struct bio *bio) | |
219 | { | |
a1dd52af NG |
220 | |
221 | int i; | |
306b0c95 | 222 | u32 index; |
a1dd52af | 223 | struct bio_vec *bvec; |
306b0c95 | 224 | |
6a907728 | 225 | rzs_stat64_inc(rzs, &rzs->stats.num_reads); |
306b0c95 | 226 | |
306b0c95 | 227 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; |
a1dd52af NG |
228 | bio_for_each_segment(bvec, bio, i) { |
229 | int ret; | |
230 | size_t clen; | |
231 | struct page *page; | |
232 | struct zobj_header *zheader; | |
233 | unsigned char *user_mem, *cmem; | |
306b0c95 | 234 | |
a1dd52af | 235 | page = bvec->bv_page; |
306b0c95 | 236 | |
a1dd52af NG |
237 | if (rzs_test_flag(rzs, index, RZS_ZERO)) { |
238 | handle_zero_page(page); | |
239 | continue; | |
240 | } | |
306b0c95 | 241 | |
a1dd52af NG |
242 | /* Requested page is not present in compressed area */ |
243 | if (unlikely(!rzs->table[index].page)) { | |
244 | pr_debug("Read before write: sector=%lu, size=%u", | |
245 | (ulong)(bio->bi_sector), bio->bi_size); | |
246 | /* Do nothing */ | |
247 | continue; | |
248 | } | |
306b0c95 | 249 | |
a1dd52af NG |
250 | /* Page is stored uncompressed since it's incompressible */ |
251 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) { | |
252 | handle_uncompressed_page(rzs, page, index); | |
253 | continue; | |
254 | } | |
306b0c95 | 255 | |
a1dd52af NG |
256 | user_mem = kmap_atomic(page, KM_USER0); |
257 | clen = PAGE_SIZE; | |
306b0c95 | 258 | |
a1dd52af NG |
259 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
260 | rzs->table[index].offset; | |
306b0c95 | 261 | |
a1dd52af NG |
262 | ret = lzo1x_decompress_safe( |
263 | cmem + sizeof(*zheader), | |
264 | xv_get_object_size(cmem) - sizeof(*zheader), | |
265 | user_mem, &clen); | |
306b0c95 | 266 | |
a1dd52af NG |
267 | kunmap_atomic(user_mem, KM_USER0); |
268 | kunmap_atomic(cmem, KM_USER1); | |
306b0c95 | 269 | |
a1dd52af NG |
270 | /* Should NEVER happen. Return bio error if it does. */ |
271 | if (unlikely(ret != LZO_E_OK)) { | |
272 | pr_err("Decompression failed! err=%d, page=%u\n", | |
273 | ret, index); | |
274 | rzs_stat64_inc(rzs, &rzs->stats.failed_reads); | |
275 | goto out; | |
276 | } | |
277 | ||
278 | flush_dcache_page(page); | |
279 | index++; | |
280 | } | |
306b0c95 NG |
281 | |
282 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
283 | bio_endio(bio, 0); | |
284 | return 0; | |
285 | ||
286 | out: | |
287 | bio_io_error(bio); | |
288 | return 0; | |
289 | } | |
290 | ||
291 | static int ramzswap_write(struct ramzswap *rzs, struct bio *bio) | |
292 | { | |
a1dd52af NG |
293 | int i; |
294 | u32 index; | |
295 | struct bio_vec *bvec; | |
306b0c95 | 296 | |
6a907728 | 297 | rzs_stat64_inc(rzs, &rzs->stats.num_writes); |
306b0c95 | 298 | |
306b0c95 NG |
299 | index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; |
300 | ||
a1dd52af NG |
301 | bio_for_each_segment(bvec, bio, i) { |
302 | int ret; | |
303 | u32 offset; | |
304 | size_t clen; | |
305 | struct zobj_header *zheader; | |
306 | struct page *page, *page_store; | |
307 | unsigned char *user_mem, *cmem, *src; | |
306b0c95 | 308 | |
a1dd52af NG |
309 | page = bvec->bv_page; |
310 | src = rzs->compress_buffer; | |
306b0c95 | 311 | |
a1dd52af NG |
312 | /* |
313 | * System overwrites unused sectors. Free memory associated | |
314 | * with this sector now. | |
315 | */ | |
316 | if (rzs->table[index].page || | |
317 | rzs_test_flag(rzs, index, RZS_ZERO)) | |
318 | ramzswap_free_page(rzs, index); | |
306b0c95 | 319 | |
a1dd52af | 320 | mutex_lock(&rzs->lock); |
306b0c95 | 321 | |
a1dd52af NG |
322 | user_mem = kmap_atomic(page, KM_USER0); |
323 | if (page_zero_filled(user_mem)) { | |
324 | kunmap_atomic(user_mem, KM_USER0); | |
325 | mutex_unlock(&rzs->lock); | |
326 | rzs_stat_inc(&rzs->stats.pages_zero); | |
327 | rzs_set_flag(rzs, index, RZS_ZERO); | |
328 | continue; | |
329 | } | |
306b0c95 | 330 | |
a1dd52af NG |
331 | ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen, |
332 | rzs->compress_workmem); | |
306b0c95 | 333 | |
a1dd52af | 334 | kunmap_atomic(user_mem, KM_USER0); |
306b0c95 | 335 | |
a1dd52af | 336 | if (unlikely(ret != LZO_E_OK)) { |
306b0c95 | 337 | mutex_unlock(&rzs->lock); |
a1dd52af | 338 | pr_err("Compression failed! err=%d\n", ret); |
6a907728 | 339 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); |
306b0c95 NG |
340 | goto out; |
341 | } | |
342 | ||
a1dd52af NG |
343 | /* |
344 | * Page is incompressible. Store it as-is (uncompressed) | |
345 | * since we do not want to return too many swap write | |
346 | * errors which has side effect of hanging the system. | |
347 | */ | |
348 | if (unlikely(clen > max_zpage_size)) { | |
349 | clen = PAGE_SIZE; | |
350 | page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); | |
351 | if (unlikely(!page_store)) { | |
352 | mutex_unlock(&rzs->lock); | |
353 | pr_info("Error allocating memory for " | |
354 | "incompressible page: %u\n", index); | |
355 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); | |
356 | goto out; | |
357 | } | |
358 | ||
359 | offset = 0; | |
360 | rzs_set_flag(rzs, index, RZS_UNCOMPRESSED); | |
361 | rzs_stat_inc(&rzs->stats.pages_expand); | |
362 | rzs->table[index].page = page_store; | |
363 | src = kmap_atomic(page, KM_USER0); | |
364 | goto memstore; | |
365 | } | |
306b0c95 | 366 | |
a1dd52af NG |
367 | if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader), |
368 | &rzs->table[index].page, &offset, | |
369 | GFP_NOIO | __GFP_HIGHMEM)) { | |
370 | mutex_unlock(&rzs->lock); | |
371 | pr_info("Error allocating memory for compressed " | |
372 | "page: %u, size=%zu\n", index, clen); | |
373 | rzs_stat64_inc(rzs, &rzs->stats.failed_writes); | |
374 | goto out; | |
375 | } | |
306b0c95 NG |
376 | |
377 | memstore: | |
a1dd52af | 378 | rzs->table[index].offset = offset; |
306b0c95 | 379 | |
a1dd52af NG |
380 | cmem = kmap_atomic(rzs->table[index].page, KM_USER1) + |
381 | rzs->table[index].offset; | |
306b0c95 NG |
382 | |
383 | #if 0 | |
a1dd52af NG |
384 | /* Back-reference needed for memory defragmentation */ |
385 | if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) { | |
386 | zheader = (struct zobj_header *)cmem; | |
387 | zheader->table_idx = index; | |
388 | cmem += sizeof(*zheader); | |
389 | } | |
306b0c95 NG |
390 | #endif |
391 | ||
a1dd52af | 392 | memcpy(cmem, src, clen); |
306b0c95 | 393 | |
a1dd52af NG |
394 | kunmap_atomic(cmem, KM_USER1); |
395 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) | |
396 | kunmap_atomic(src, KM_USER0); | |
306b0c95 | 397 | |
a1dd52af NG |
398 | /* Update stats */ |
399 | rzs->stats.compr_size += clen; | |
400 | rzs_stat_inc(&rzs->stats.pages_stored); | |
401 | if (clen <= PAGE_SIZE / 2) | |
402 | rzs_stat_inc(&rzs->stats.good_compress); | |
306b0c95 | 403 | |
a1dd52af NG |
404 | mutex_unlock(&rzs->lock); |
405 | index++; | |
406 | } | |
306b0c95 NG |
407 | |
408 | set_bit(BIO_UPTODATE, &bio->bi_flags); | |
409 | bio_endio(bio, 0); | |
410 | return 0; | |
411 | ||
412 | out: | |
306b0c95 NG |
413 | bio_io_error(bio); |
414 | return 0; | |
415 | } | |
416 | ||
306b0c95 NG |
417 | /* |
418 | * Check if request is within bounds and page aligned. | |
419 | */ | |
a1dd52af | 420 | static inline int valid_io_request(struct ramzswap *rzs, struct bio *bio) |
306b0c95 NG |
421 | { |
422 | if (unlikely( | |
423 | (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) || | |
424 | (bio->bi_sector & (SECTORS_PER_PAGE - 1)) || | |
a1dd52af | 425 | (bio->bi_size & (PAGE_SIZE - 1)))) { |
306b0c95 NG |
426 | |
427 | return 0; | |
428 | } | |
429 | ||
a1dd52af | 430 | /* I/O request is valid */ |
306b0c95 NG |
431 | return 1; |
432 | } | |
433 | ||
434 | /* | |
435 | * Handler function for all ramzswap I/O requests. | |
436 | */ | |
437 | static int ramzswap_make_request(struct request_queue *queue, struct bio *bio) | |
438 | { | |
439 | int ret = 0; | |
440 | struct ramzswap *rzs = queue->queuedata; | |
441 | ||
442 | if (unlikely(!rzs->init_done)) { | |
443 | bio_io_error(bio); | |
444 | return 0; | |
445 | } | |
446 | ||
a1dd52af | 447 | if (!valid_io_request(rzs, bio)) { |
6a907728 | 448 | rzs_stat64_inc(rzs, &rzs->stats.invalid_io); |
306b0c95 NG |
449 | bio_io_error(bio); |
450 | return 0; | |
451 | } | |
452 | ||
453 | switch (bio_data_dir(bio)) { | |
454 | case READ: | |
455 | ret = ramzswap_read(rzs, bio); | |
456 | break; | |
457 | ||
458 | case WRITE: | |
459 | ret = ramzswap_write(rzs, bio); | |
460 | break; | |
461 | } | |
462 | ||
463 | return ret; | |
464 | } | |
465 | ||
466 | static void reset_device(struct ramzswap *rzs) | |
467 | { | |
97a06382 | 468 | size_t index; |
306b0c95 | 469 | |
7eef7533 NG |
470 | /* Do not accept any new I/O request */ |
471 | rzs->init_done = 0; | |
472 | ||
306b0c95 NG |
473 | /* Free various per-device buffers */ |
474 | kfree(rzs->compress_workmem); | |
475 | free_pages((unsigned long)rzs->compress_buffer, 1); | |
476 | ||
477 | rzs->compress_workmem = NULL; | |
478 | rzs->compress_buffer = NULL; | |
479 | ||
480 | /* Free all pages that are still in this ramzswap device */ | |
97a06382 | 481 | for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) { |
306b0c95 NG |
482 | struct page *page; |
483 | u16 offset; | |
484 | ||
485 | page = rzs->table[index].page; | |
486 | offset = rzs->table[index].offset; | |
487 | ||
488 | if (!page) | |
489 | continue; | |
490 | ||
491 | if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) | |
492 | __free_page(page); | |
493 | else | |
494 | xv_free(rzs->mem_pool, page, offset); | |
495 | } | |
496 | ||
306b0c95 NG |
497 | vfree(rzs->table); |
498 | rzs->table = NULL; | |
499 | ||
500 | xv_destroy_pool(rzs->mem_pool); | |
501 | rzs->mem_pool = NULL; | |
502 | ||
306b0c95 NG |
503 | /* Reset stats */ |
504 | memset(&rzs->stats, 0, sizeof(rzs->stats)); | |
505 | ||
506 | rzs->disksize = 0; | |
306b0c95 NG |
507 | } |
508 | ||
509 | static int ramzswap_ioctl_init_device(struct ramzswap *rzs) | |
510 | { | |
511 | int ret; | |
512 | size_t num_pages; | |
306b0c95 NG |
513 | |
514 | if (rzs->init_done) { | |
515 | pr_info("Device already initialized!\n"); | |
516 | return -EBUSY; | |
517 | } | |
518 | ||
97a06382 | 519 | ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT); |
306b0c95 NG |
520 | |
521 | rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); | |
522 | if (!rzs->compress_workmem) { | |
523 | pr_err("Error allocating compressor working memory!\n"); | |
524 | ret = -ENOMEM; | |
525 | goto fail; | |
526 | } | |
527 | ||
528 | rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); | |
529 | if (!rzs->compress_buffer) { | |
530 | pr_err("Error allocating compressor buffer space\n"); | |
531 | ret = -ENOMEM; | |
532 | goto fail; | |
533 | } | |
534 | ||
535 | num_pages = rzs->disksize >> PAGE_SHIFT; | |
536 | rzs->table = vmalloc(num_pages * sizeof(*rzs->table)); | |
537 | if (!rzs->table) { | |
538 | pr_err("Error allocating ramzswap address table\n"); | |
539 | /* To prevent accessing table entries during cleanup */ | |
540 | rzs->disksize = 0; | |
541 | ret = -ENOMEM; | |
542 | goto fail; | |
543 | } | |
544 | memset(rzs->table, 0, num_pages * sizeof(*rzs->table)); | |
545 | ||
306b0c95 NG |
546 | set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT); |
547 | ||
97a06382 NG |
548 | /* ramzswap devices sort of resembles non-rotational disks */ |
549 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue); | |
306b0c95 NG |
550 | |
551 | rzs->mem_pool = xv_create_pool(); | |
552 | if (!rzs->mem_pool) { | |
553 | pr_err("Error creating memory pool\n"); | |
554 | ret = -ENOMEM; | |
555 | goto fail; | |
556 | } | |
557 | ||
306b0c95 NG |
558 | rzs->init_done = 1; |
559 | ||
560 | pr_debug("Initialization done!\n"); | |
561 | return 0; | |
562 | ||
563 | fail: | |
564 | reset_device(rzs); | |
565 | ||
566 | pr_err("Initialization failed: err=%d\n", ret); | |
567 | return ret; | |
568 | } | |
569 | ||
570 | static int ramzswap_ioctl_reset_device(struct ramzswap *rzs) | |
571 | { | |
572 | if (rzs->init_done) | |
573 | reset_device(rzs); | |
574 | ||
575 | return 0; | |
576 | } | |
577 | ||
578 | static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode, | |
579 | unsigned int cmd, unsigned long arg) | |
580 | { | |
581 | int ret = 0; | |
97a06382 | 582 | size_t disksize_kb; |
306b0c95 NG |
583 | |
584 | struct ramzswap *rzs = bdev->bd_disk->private_data; | |
585 | ||
586 | switch (cmd) { | |
587 | case RZSIO_SET_DISKSIZE_KB: | |
588 | if (rzs->init_done) { | |
589 | ret = -EBUSY; | |
590 | goto out; | |
591 | } | |
592 | if (copy_from_user(&disksize_kb, (void *)arg, | |
593 | _IOC_SIZE(cmd))) { | |
594 | ret = -EFAULT; | |
595 | goto out; | |
596 | } | |
597 | rzs->disksize = disksize_kb << 10; | |
598 | pr_info("Disk size set to %zu kB\n", disksize_kb); | |
599 | break; | |
600 | ||
306b0c95 NG |
601 | case RZSIO_GET_STATS: |
602 | { | |
603 | struct ramzswap_ioctl_stats *stats; | |
604 | if (!rzs->init_done) { | |
605 | ret = -ENOTTY; | |
606 | goto out; | |
607 | } | |
608 | stats = kzalloc(sizeof(*stats), GFP_KERNEL); | |
609 | if (!stats) { | |
610 | ret = -ENOMEM; | |
611 | goto out; | |
612 | } | |
613 | ramzswap_ioctl_get_stats(rzs, stats); | |
614 | if (copy_to_user((void *)arg, stats, sizeof(*stats))) { | |
615 | kfree(stats); | |
616 | ret = -EFAULT; | |
617 | goto out; | |
618 | } | |
619 | kfree(stats); | |
620 | break; | |
621 | } | |
622 | case RZSIO_INIT: | |
623 | ret = ramzswap_ioctl_init_device(rzs); | |
624 | break; | |
625 | ||
626 | case RZSIO_RESET: | |
627 | /* Do not reset an active device! */ | |
628 | if (bdev->bd_holders) { | |
629 | ret = -EBUSY; | |
630 | goto out; | |
631 | } | |
7eef7533 NG |
632 | |
633 | /* Make sure all pending I/O is finished */ | |
634 | if (bdev) | |
635 | fsync_bdev(bdev); | |
636 | ||
306b0c95 NG |
637 | ret = ramzswap_ioctl_reset_device(rzs); |
638 | break; | |
639 | ||
640 | default: | |
641 | pr_info("Invalid ioctl %u\n", cmd); | |
642 | ret = -ENOTTY; | |
643 | } | |
644 | ||
645 | out: | |
646 | return ret; | |
647 | } | |
648 | ||
107c161b NG |
649 | void ramzswap_slot_free_notify(struct block_device *bdev, unsigned long index) |
650 | { | |
651 | struct ramzswap *rzs; | |
652 | ||
653 | rzs = bdev->bd_disk->private_data; | |
654 | ramzswap_free_page(rzs, index); | |
655 | rzs_stat64_inc(rzs, &rzs->stats.notify_free); | |
107c161b NG |
656 | } |
657 | ||
a1dd52af | 658 | static const struct block_device_operations ramzswap_devops = { |
306b0c95 | 659 | .ioctl = ramzswap_ioctl, |
107c161b NG |
660 | .swap_slot_free_notify = ramzswap_slot_free_notify, |
661 | .owner = THIS_MODULE | |
306b0c95 NG |
662 | }; |
663 | ||
3bf040c7 | 664 | static int create_device(struct ramzswap *rzs, int device_id) |
306b0c95 | 665 | { |
de1a21a0 NG |
666 | int ret = 0; |
667 | ||
306b0c95 | 668 | mutex_init(&rzs->lock); |
6a907728 | 669 | spin_lock_init(&rzs->stat64_lock); |
306b0c95 NG |
670 | |
671 | rzs->queue = blk_alloc_queue(GFP_KERNEL); | |
672 | if (!rzs->queue) { | |
673 | pr_err("Error allocating disk queue for device %d\n", | |
674 | device_id); | |
de1a21a0 NG |
675 | ret = -ENOMEM; |
676 | goto out; | |
306b0c95 NG |
677 | } |
678 | ||
679 | blk_queue_make_request(rzs->queue, ramzswap_make_request); | |
680 | rzs->queue->queuedata = rzs; | |
681 | ||
682 | /* gendisk structure */ | |
683 | rzs->disk = alloc_disk(1); | |
684 | if (!rzs->disk) { | |
685 | blk_cleanup_queue(rzs->queue); | |
686 | pr_warning("Error allocating disk structure for device %d\n", | |
687 | device_id); | |
de1a21a0 NG |
688 | ret = -ENOMEM; |
689 | goto out; | |
306b0c95 NG |
690 | } |
691 | ||
692 | rzs->disk->major = ramzswap_major; | |
693 | rzs->disk->first_minor = device_id; | |
694 | rzs->disk->fops = &ramzswap_devops; | |
695 | rzs->disk->queue = rzs->queue; | |
696 | rzs->disk->private_data = rzs; | |
697 | snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id); | |
698 | ||
97a06382 | 699 | /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */ |
306b0c95 | 700 | set_capacity(rzs->disk, 0); |
5d83d5a0 | 701 | |
a1dd52af NG |
702 | /* |
703 | * To ensure that we always get PAGE_SIZE aligned | |
704 | * and n*PAGE_SIZED sized I/O requests. | |
705 | */ | |
5d83d5a0 NG |
706 | blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE); |
707 | blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE); | |
a1dd52af NG |
708 | blk_queue_io_min(rzs->disk->queue, PAGE_SIZE); |
709 | blk_queue_io_opt(rzs->disk->queue, PAGE_SIZE); | |
5d83d5a0 | 710 | |
306b0c95 NG |
711 | add_disk(rzs->disk); |
712 | ||
713 | rzs->init_done = 0; | |
de1a21a0 NG |
714 | |
715 | out: | |
716 | return ret; | |
306b0c95 NG |
717 | } |
718 | ||
719 | static void destroy_device(struct ramzswap *rzs) | |
720 | { | |
721 | if (rzs->disk) { | |
722 | del_gendisk(rzs->disk); | |
723 | put_disk(rzs->disk); | |
724 | } | |
725 | ||
726 | if (rzs->queue) | |
727 | blk_cleanup_queue(rzs->queue); | |
728 | } | |
729 | ||
730 | static int __init ramzswap_init(void) | |
731 | { | |
de1a21a0 | 732 | int ret, dev_id; |
306b0c95 NG |
733 | |
734 | if (num_devices > max_num_devices) { | |
735 | pr_warning("Invalid value for num_devices: %u\n", | |
736 | num_devices); | |
de1a21a0 NG |
737 | ret = -EINVAL; |
738 | goto out; | |
306b0c95 NG |
739 | } |
740 | ||
741 | ramzswap_major = register_blkdev(0, "ramzswap"); | |
742 | if (ramzswap_major <= 0) { | |
743 | pr_warning("Unable to get major number\n"); | |
de1a21a0 NG |
744 | ret = -EBUSY; |
745 | goto out; | |
306b0c95 NG |
746 | } |
747 | ||
748 | if (!num_devices) { | |
749 | pr_info("num_devices not specified. Using default: 1\n"); | |
750 | num_devices = 1; | |
751 | } | |
752 | ||
753 | /* Allocate the device array and initialize each one */ | |
754 | pr_info("Creating %u devices ...\n", num_devices); | |
755 | devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL); | |
de1a21a0 NG |
756 | if (!devices) { |
757 | ret = -ENOMEM; | |
758 | goto unregister; | |
759 | } | |
306b0c95 | 760 | |
de1a21a0 NG |
761 | for (dev_id = 0; dev_id < num_devices; dev_id++) { |
762 | ret = create_device(&devices[dev_id], dev_id); | |
763 | if (ret) | |
3bf040c7 | 764 | goto free_devices; |
de1a21a0 NG |
765 | } |
766 | ||
306b0c95 | 767 | return 0; |
de1a21a0 | 768 | |
3bf040c7 | 769 | free_devices: |
de1a21a0 NG |
770 | while (dev_id) |
771 | destroy_device(&devices[--dev_id]); | |
772 | unregister: | |
306b0c95 | 773 | unregister_blkdev(ramzswap_major, "ramzswap"); |
de1a21a0 | 774 | out: |
306b0c95 NG |
775 | return ret; |
776 | } | |
777 | ||
778 | static void __exit ramzswap_exit(void) | |
779 | { | |
780 | int i; | |
781 | struct ramzswap *rzs; | |
782 | ||
783 | for (i = 0; i < num_devices; i++) { | |
784 | rzs = &devices[i]; | |
785 | ||
786 | destroy_device(rzs); | |
787 | if (rzs->init_done) | |
788 | reset_device(rzs); | |
789 | } | |
790 | ||
791 | unregister_blkdev(ramzswap_major, "ramzswap"); | |
792 | ||
793 | kfree(devices); | |
794 | pr_debug("Cleanup done!\n"); | |
795 | } | |
796 | ||
797 | module_param(num_devices, uint, 0); | |
798 | MODULE_PARM_DESC(num_devices, "Number of ramzswap devices"); | |
799 | ||
800 | module_init(ramzswap_init); | |
801 | module_exit(ramzswap_exit); | |
802 | ||
803 | MODULE_LICENSE("Dual BSD/GPL"); | |
804 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
805 | MODULE_DESCRIPTION("Compressed RAM Based Swap Device"); |