Commit | Line | Data |
---|---|---|
faca2ef7 DM |
1 | /* |
2 | * zcache.c | |
3 | * | |
4 | * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp. | |
5 | * Copyright (c) 2010,2011, Nitin Gupta | |
6 | * | |
7 | * Zcache provides an in-kernel "host implementation" for transcendent memory | |
8 | * ("tmem") and, thus indirectly, for cleancache and frontswap. Zcache uses | |
9 | * lzo1x compression to improve density and an embedded allocator called | |
10 | * "zbud" which "buddies" two compressed pages semi-optimally in each physical | |
11 | * pageframe. Zbud is integrally tied into tmem to allow pageframes to | |
12 | * be "reclaimed" efficiently. | |
13 | */ | |
14 | ||
15 | #include <linux/module.h> | |
16 | #include <linux/cpu.h> | |
17 | #include <linux/highmem.h> | |
18 | #include <linux/list.h> | |
19 | #include <linux/slab.h> | |
20 | #include <linux/spinlock.h> | |
21 | #include <linux/types.h> | |
22 | #include <linux/atomic.h> | |
23 | #include <linux/math64.h> | |
24 | #include <linux/crypto.h> | |
25 | ||
26 | #include <linux/cleancache.h> | |
27 | #include <linux/frontswap.h> | |
28 | #include "tmem.h" | |
29 | #include "zcache.h" | |
30 | #include "zbud.h" | |
31 | #include "ramster.h" | |
32 | #ifdef CONFIG_RAMSTER | |
33 | static int ramster_enabled; | |
34 | #else | |
35 | #define ramster_enabled 0 | |
36 | #endif | |
37 | ||
38 | #ifndef __PG_WAS_ACTIVE | |
39 | static inline bool PageWasActive(struct page *page) | |
40 | { | |
41 | return true; | |
42 | } | |
43 | ||
44 | static inline void SetPageWasActive(struct page *page) | |
45 | { | |
46 | } | |
47 | #endif | |
48 | ||
49 | #ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS | |
50 | static bool frontswap_has_exclusive_gets __read_mostly = true; | |
51 | #else | |
52 | static bool frontswap_has_exclusive_gets __read_mostly; | |
53 | static inline void frontswap_tmem_exclusive_gets(bool b) | |
54 | { | |
55 | } | |
56 | #endif | |
57 | ||
58 | static int zcache_enabled __read_mostly; | |
59 | static int disable_cleancache __read_mostly; | |
60 | static int disable_frontswap __read_mostly; | |
61 | static int disable_frontswap_ignore_nonactive __read_mostly; | |
62 | static int disable_cleancache_ignore_nonactive __read_mostly; | |
63 | static char *namestr __read_mostly = "zcache"; | |
64 | ||
65 | #define ZCACHE_GFP_MASK \ | |
66 | (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) | |
67 | ||
68 | MODULE_LICENSE("GPL"); | |
69 | ||
70 | /* crypto API for zcache */ | |
71 | #define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME | |
72 | static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly; | |
73 | static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly; | |
74 | ||
75 | enum comp_op { | |
76 | ZCACHE_COMPOP_COMPRESS, | |
77 | ZCACHE_COMPOP_DECOMPRESS | |
78 | }; | |
79 | ||
80 | static inline int zcache_comp_op(enum comp_op op, | |
81 | const u8 *src, unsigned int slen, | |
82 | u8 *dst, unsigned int *dlen) | |
83 | { | |
84 | struct crypto_comp *tfm; | |
85 | int ret = -1; | |
86 | ||
87 | BUG_ON(!zcache_comp_pcpu_tfms); | |
88 | tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu()); | |
89 | BUG_ON(!tfm); | |
90 | switch (op) { | |
91 | case ZCACHE_COMPOP_COMPRESS: | |
92 | ret = crypto_comp_compress(tfm, src, slen, dst, dlen); | |
93 | break; | |
94 | case ZCACHE_COMPOP_DECOMPRESS: | |
95 | ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); | |
96 | break; | |
97 | default: | |
98 | ret = -EINVAL; | |
99 | } | |
100 | put_cpu(); | |
101 | return ret; | |
102 | } | |
103 | ||
104 | /* | |
105 | * policy parameters | |
106 | */ | |
107 | ||
108 | /* | |
109 | * byte count defining poor compression; pages with greater zsize will be | |
110 | * rejected | |
111 | */ | |
112 | static unsigned int zbud_max_zsize __read_mostly = (PAGE_SIZE / 8) * 7; | |
113 | /* | |
114 | * byte count defining poor *mean* compression; pages with greater zsize | |
115 | * will be rejected until sufficient better-compressed pages are accepted | |
116 | * driving the mean below this threshold | |
117 | */ | |
118 | static unsigned int zbud_max_mean_zsize __read_mostly = (PAGE_SIZE / 8) * 5; | |
119 | ||
120 | /* | |
121 | * for now, used named slabs so can easily track usage; later can | |
122 | * either just use kmalloc, or perhaps add a slab-like allocator | |
123 | * to more carefully manage total memory utilization | |
124 | */ | |
125 | static struct kmem_cache *zcache_objnode_cache; | |
126 | static struct kmem_cache *zcache_obj_cache; | |
127 | ||
128 | static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; | |
129 | ||
130 | /* we try to keep these statistics SMP-consistent */ | |
131 | static long zcache_obj_count; | |
132 | static atomic_t zcache_obj_atomic = ATOMIC_INIT(0); | |
133 | static long zcache_obj_count_max; | |
134 | static long zcache_objnode_count; | |
135 | static atomic_t zcache_objnode_atomic = ATOMIC_INIT(0); | |
136 | static long zcache_objnode_count_max; | |
137 | static u64 zcache_eph_zbytes; | |
138 | static atomic_long_t zcache_eph_zbytes_atomic = ATOMIC_INIT(0); | |
139 | static u64 zcache_eph_zbytes_max; | |
140 | static u64 zcache_pers_zbytes; | |
141 | static atomic_long_t zcache_pers_zbytes_atomic = ATOMIC_INIT(0); | |
142 | static u64 zcache_pers_zbytes_max; | |
143 | static long zcache_eph_pageframes; | |
144 | static atomic_t zcache_eph_pageframes_atomic = ATOMIC_INIT(0); | |
145 | static long zcache_eph_pageframes_max; | |
146 | static long zcache_pers_pageframes; | |
147 | static atomic_t zcache_pers_pageframes_atomic = ATOMIC_INIT(0); | |
148 | static long zcache_pers_pageframes_max; | |
149 | static long zcache_pageframes_alloced; | |
150 | static atomic_t zcache_pageframes_alloced_atomic = ATOMIC_INIT(0); | |
151 | static long zcache_pageframes_freed; | |
152 | static atomic_t zcache_pageframes_freed_atomic = ATOMIC_INIT(0); | |
153 | static long zcache_eph_zpages; | |
154 | static atomic_t zcache_eph_zpages_atomic = ATOMIC_INIT(0); | |
155 | static long zcache_eph_zpages_max; | |
156 | static long zcache_pers_zpages; | |
157 | static atomic_t zcache_pers_zpages_atomic = ATOMIC_INIT(0); | |
158 | static long zcache_pers_zpages_max; | |
159 | ||
160 | /* but for the rest of these, counting races are ok */ | |
161 | static unsigned long zcache_flush_total; | |
162 | static unsigned long zcache_flush_found; | |
163 | static unsigned long zcache_flobj_total; | |
164 | static unsigned long zcache_flobj_found; | |
165 | static unsigned long zcache_failed_eph_puts; | |
166 | static unsigned long zcache_failed_pers_puts; | |
167 | static unsigned long zcache_failed_getfreepages; | |
168 | static unsigned long zcache_failed_alloc; | |
169 | static unsigned long zcache_put_to_flush; | |
170 | static unsigned long zcache_compress_poor; | |
171 | static unsigned long zcache_mean_compress_poor; | |
172 | static unsigned long zcache_eph_ate_tail; | |
173 | static unsigned long zcache_eph_ate_tail_failed; | |
174 | static unsigned long zcache_pers_ate_eph; | |
175 | static unsigned long zcache_pers_ate_eph_failed; | |
176 | static unsigned long zcache_evicted_eph_zpages; | |
177 | static unsigned long zcache_evicted_eph_pageframes; | |
178 | static unsigned long zcache_last_active_file_pageframes; | |
179 | static unsigned long zcache_last_inactive_file_pageframes; | |
180 | static unsigned long zcache_last_active_anon_pageframes; | |
181 | static unsigned long zcache_last_inactive_anon_pageframes; | |
182 | static unsigned long zcache_eph_nonactive_puts_ignored; | |
183 | static unsigned long zcache_pers_nonactive_puts_ignored; | |
184 | ||
185 | #ifdef CONFIG_DEBUG_FS | |
186 | #include <linux/debugfs.h> | |
187 | #define zdfs debugfs_create_size_t | |
188 | #define zdfs64 debugfs_create_u64 | |
189 | static int zcache_debugfs_init(void) | |
190 | { | |
191 | struct dentry *root = debugfs_create_dir("zcache", NULL); | |
192 | if (root == NULL) | |
193 | return -ENXIO; | |
194 | ||
195 | zdfs("obj_count", S_IRUGO, root, &zcache_obj_count); | |
196 | zdfs("obj_count_max", S_IRUGO, root, &zcache_obj_count_max); | |
197 | zdfs("objnode_count", S_IRUGO, root, &zcache_objnode_count); | |
198 | zdfs("objnode_count_max", S_IRUGO, root, &zcache_objnode_count_max); | |
199 | zdfs("flush_total", S_IRUGO, root, &zcache_flush_total); | |
200 | zdfs("flush_found", S_IRUGO, root, &zcache_flush_found); | |
201 | zdfs("flobj_total", S_IRUGO, root, &zcache_flobj_total); | |
202 | zdfs("flobj_found", S_IRUGO, root, &zcache_flobj_found); | |
203 | zdfs("failed_eph_puts", S_IRUGO, root, &zcache_failed_eph_puts); | |
204 | zdfs("failed_pers_puts", S_IRUGO, root, &zcache_failed_pers_puts); | |
205 | zdfs("failed_get_free_pages", S_IRUGO, root, | |
206 | &zcache_failed_getfreepages); | |
207 | zdfs("failed_alloc", S_IRUGO, root, &zcache_failed_alloc); | |
208 | zdfs("put_to_flush", S_IRUGO, root, &zcache_put_to_flush); | |
209 | zdfs("compress_poor", S_IRUGO, root, &zcache_compress_poor); | |
210 | zdfs("mean_compress_poor", S_IRUGO, root, &zcache_mean_compress_poor); | |
211 | zdfs("eph_ate_tail", S_IRUGO, root, &zcache_eph_ate_tail); | |
212 | zdfs("eph_ate_tail_failed", S_IRUGO, root, &zcache_eph_ate_tail_failed); | |
213 | zdfs("pers_ate_eph", S_IRUGO, root, &zcache_pers_ate_eph); | |
214 | zdfs("pers_ate_eph_failed", S_IRUGO, root, &zcache_pers_ate_eph_failed); | |
215 | zdfs("evicted_eph_zpages", S_IRUGO, root, &zcache_evicted_eph_zpages); | |
216 | zdfs("evicted_eph_pageframes", S_IRUGO, root, | |
217 | &zcache_evicted_eph_pageframes); | |
218 | zdfs("eph_pageframes", S_IRUGO, root, &zcache_eph_pageframes); | |
219 | zdfs("eph_pageframes_max", S_IRUGO, root, &zcache_eph_pageframes_max); | |
220 | zdfs("pers_pageframes", S_IRUGO, root, &zcache_pers_pageframes); | |
221 | zdfs("pers_pageframes_max", S_IRUGO, root, &zcache_pers_pageframes_max); | |
222 | zdfs("eph_zpages", S_IRUGO, root, &zcache_eph_zpages); | |
223 | zdfs("eph_zpages_max", S_IRUGO, root, &zcache_eph_zpages_max); | |
224 | zdfs("pers_zpages", S_IRUGO, root, &zcache_pers_zpages); | |
225 | zdfs("pers_zpages_max", S_IRUGO, root, &zcache_pers_zpages_max); | |
226 | zdfs("last_active_file_pageframes", S_IRUGO, root, | |
227 | &zcache_last_active_file_pageframes); | |
228 | zdfs("last_inactive_file_pageframes", S_IRUGO, root, | |
229 | &zcache_last_inactive_file_pageframes); | |
230 | zdfs("last_active_anon_pageframes", S_IRUGO, root, | |
231 | &zcache_last_active_anon_pageframes); | |
232 | zdfs("last_inactive_anon_pageframes", S_IRUGO, root, | |
233 | &zcache_last_inactive_anon_pageframes); | |
234 | zdfs("eph_nonactive_puts_ignored", S_IRUGO, root, | |
235 | &zcache_eph_nonactive_puts_ignored); | |
236 | zdfs("pers_nonactive_puts_ignored", S_IRUGO, root, | |
237 | &zcache_pers_nonactive_puts_ignored); | |
238 | zdfs64("eph_zbytes", S_IRUGO, root, &zcache_eph_zbytes); | |
239 | zdfs64("eph_zbytes_max", S_IRUGO, root, &zcache_eph_zbytes_max); | |
240 | zdfs64("pers_zbytes", S_IRUGO, root, &zcache_pers_zbytes); | |
241 | zdfs64("pers_zbytes_max", S_IRUGO, root, &zcache_pers_zbytes_max); | |
242 | return 0; | |
243 | } | |
244 | #undef zdebugfs | |
245 | #undef zdfs64 | |
246 | #endif | |
247 | ||
248 | #define ZCACHE_DEBUG | |
249 | #ifdef ZCACHE_DEBUG | |
250 | /* developers can call this in case of ooms, e.g. to find memory leaks */ | |
251 | void zcache_dump(void) | |
252 | { | |
253 | pr_info("zcache: obj_count=%lu\n", zcache_obj_count); | |
254 | pr_info("zcache: obj_count_max=%lu\n", zcache_obj_count_max); | |
255 | pr_info("zcache: objnode_count=%lu\n", zcache_objnode_count); | |
256 | pr_info("zcache: objnode_count_max=%lu\n", zcache_objnode_count_max); | |
257 | pr_info("zcache: flush_total=%lu\n", zcache_flush_total); | |
258 | pr_info("zcache: flush_found=%lu\n", zcache_flush_found); | |
259 | pr_info("zcache: flobj_total=%lu\n", zcache_flobj_total); | |
260 | pr_info("zcache: flobj_found=%lu\n", zcache_flobj_found); | |
261 | pr_info("zcache: failed_eph_puts=%lu\n", zcache_failed_eph_puts); | |
262 | pr_info("zcache: failed_pers_puts=%lu\n", zcache_failed_pers_puts); | |
263 | pr_info("zcache: failed_get_free_pages=%lu\n", | |
264 | zcache_failed_getfreepages); | |
265 | pr_info("zcache: failed_alloc=%lu\n", zcache_failed_alloc); | |
266 | pr_info("zcache: put_to_flush=%lu\n", zcache_put_to_flush); | |
267 | pr_info("zcache: compress_poor=%lu\n", zcache_compress_poor); | |
268 | pr_info("zcache: mean_compress_poor=%lu\n", | |
269 | zcache_mean_compress_poor); | |
270 | pr_info("zcache: eph_ate_tail=%lu\n", zcache_eph_ate_tail); | |
271 | pr_info("zcache: eph_ate_tail_failed=%lu\n", | |
272 | zcache_eph_ate_tail_failed); | |
273 | pr_info("zcache: pers_ate_eph=%lu\n", zcache_pers_ate_eph); | |
274 | pr_info("zcache: pers_ate_eph_failed=%lu\n", | |
275 | zcache_pers_ate_eph_failed); | |
276 | pr_info("zcache: evicted_eph_zpages=%lu\n", zcache_evicted_eph_zpages); | |
277 | pr_info("zcache: evicted_eph_pageframes=%lu\n", | |
278 | zcache_evicted_eph_pageframes); | |
279 | pr_info("zcache: eph_pageframes=%lu\n", zcache_eph_pageframes); | |
280 | pr_info("zcache: eph_pageframes_max=%lu\n", zcache_eph_pageframes_max); | |
281 | pr_info("zcache: pers_pageframes=%lu\n", zcache_pers_pageframes); | |
282 | pr_info("zcache: pers_pageframes_max=%lu\n", | |
283 | zcache_pers_pageframes_max); | |
284 | pr_info("zcache: eph_zpages=%lu\n", zcache_eph_zpages); | |
285 | pr_info("zcache: eph_zpages_max=%lu\n", zcache_eph_zpages_max); | |
286 | pr_info("zcache: pers_zpages=%lu\n", zcache_pers_zpages); | |
287 | pr_info("zcache: pers_zpages_max=%lu\n", zcache_pers_zpages_max); | |
288 | pr_info("zcache: eph_zbytes=%llu\n", | |
289 | (unsigned long long)zcache_eph_zbytes); | |
290 | pr_info("zcache: eph_zbytes_max=%llu\n", | |
291 | (unsigned long long)zcache_eph_zbytes_max); | |
292 | pr_info("zcache: pers_zbytes=%llu\n", | |
293 | (unsigned long long)zcache_pers_zbytes); | |
294 | pr_info("zcache: pers_zbytes_max=%llu\n", | |
295 | (unsigned long long)zcache_pers_zbytes_max); | |
296 | } | |
297 | #endif | |
298 | ||
299 | /* | |
300 | * zcache core code starts here | |
301 | */ | |
302 | ||
303 | static struct zcache_client zcache_host; | |
304 | static struct zcache_client zcache_clients[MAX_CLIENTS]; | |
305 | ||
306 | static inline bool is_local_client(struct zcache_client *cli) | |
307 | { | |
308 | return cli == &zcache_host; | |
309 | } | |
310 | ||
311 | static struct zcache_client *zcache_get_client_by_id(uint16_t cli_id) | |
312 | { | |
313 | struct zcache_client *cli = &zcache_host; | |
314 | ||
315 | if (cli_id != LOCAL_CLIENT) { | |
316 | if (cli_id >= MAX_CLIENTS) | |
317 | goto out; | |
318 | cli = &zcache_clients[cli_id]; | |
319 | } | |
320 | out: | |
321 | return cli; | |
322 | } | |
323 | ||
324 | /* | |
325 | * Tmem operations assume the poolid implies the invoking client. | |
326 | * Zcache only has one client (the kernel itself): LOCAL_CLIENT. | |
327 | * RAMster has each client numbered by cluster node, and a KVM version | |
328 | * of zcache would have one client per guest and each client might | |
329 | * have a poolid==N. | |
330 | */ | |
331 | struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid) | |
332 | { | |
333 | struct tmem_pool *pool = NULL; | |
334 | struct zcache_client *cli = NULL; | |
335 | ||
336 | cli = zcache_get_client_by_id(cli_id); | |
337 | if (cli == NULL) | |
338 | goto out; | |
339 | if (!is_local_client(cli)) | |
340 | atomic_inc(&cli->refcount); | |
341 | if (poolid < MAX_POOLS_PER_CLIENT) { | |
342 | pool = cli->tmem_pools[poolid]; | |
343 | if (pool != NULL) | |
344 | atomic_inc(&pool->refcount); | |
345 | } | |
346 | out: | |
347 | return pool; | |
348 | } | |
349 | ||
350 | void zcache_put_pool(struct tmem_pool *pool) | |
351 | { | |
352 | struct zcache_client *cli = NULL; | |
353 | ||
354 | if (pool == NULL) | |
355 | BUG(); | |
356 | cli = pool->client; | |
357 | atomic_dec(&pool->refcount); | |
358 | if (!is_local_client(cli)) | |
359 | atomic_dec(&cli->refcount); | |
360 | } | |
361 | ||
362 | int zcache_new_client(uint16_t cli_id) | |
363 | { | |
364 | struct zcache_client *cli; | |
365 | int ret = -1; | |
366 | ||
367 | cli = zcache_get_client_by_id(cli_id); | |
368 | if (cli == NULL) | |
369 | goto out; | |
370 | if (cli->allocated) | |
371 | goto out; | |
372 | cli->allocated = 1; | |
373 | ret = 0; | |
374 | out: | |
375 | return ret; | |
376 | } | |
377 | ||
378 | /* | |
379 | * zcache implementation for tmem host ops | |
380 | */ | |
381 | ||
382 | static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) | |
383 | { | |
384 | struct tmem_objnode *objnode = NULL; | |
385 | struct zcache_preload *kp; | |
386 | int i; | |
387 | ||
388 | kp = &__get_cpu_var(zcache_preloads); | |
389 | for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { | |
390 | objnode = kp->objnodes[i]; | |
391 | if (objnode != NULL) { | |
392 | kp->objnodes[i] = NULL; | |
393 | break; | |
394 | } | |
395 | } | |
396 | BUG_ON(objnode == NULL); | |
397 | zcache_objnode_count = atomic_inc_return(&zcache_objnode_atomic); | |
398 | if (zcache_objnode_count > zcache_objnode_count_max) | |
399 | zcache_objnode_count_max = zcache_objnode_count; | |
400 | return objnode; | |
401 | } | |
402 | ||
403 | static void zcache_objnode_free(struct tmem_objnode *objnode, | |
404 | struct tmem_pool *pool) | |
405 | { | |
406 | zcache_objnode_count = | |
407 | atomic_dec_return(&zcache_objnode_atomic); | |
408 | BUG_ON(zcache_objnode_count < 0); | |
409 | kmem_cache_free(zcache_objnode_cache, objnode); | |
410 | } | |
411 | ||
412 | static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) | |
413 | { | |
414 | struct tmem_obj *obj = NULL; | |
415 | struct zcache_preload *kp; | |
416 | ||
417 | kp = &__get_cpu_var(zcache_preloads); | |
418 | obj = kp->obj; | |
419 | BUG_ON(obj == NULL); | |
420 | kp->obj = NULL; | |
421 | zcache_obj_count = atomic_inc_return(&zcache_obj_atomic); | |
422 | if (zcache_obj_count > zcache_obj_count_max) | |
423 | zcache_obj_count_max = zcache_obj_count; | |
424 | return obj; | |
425 | } | |
426 | ||
427 | static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) | |
428 | { | |
429 | zcache_obj_count = | |
430 | atomic_dec_return(&zcache_obj_atomic); | |
431 | BUG_ON(zcache_obj_count < 0); | |
432 | kmem_cache_free(zcache_obj_cache, obj); | |
433 | } | |
434 | ||
435 | static struct tmem_hostops zcache_hostops = { | |
436 | .obj_alloc = zcache_obj_alloc, | |
437 | .obj_free = zcache_obj_free, | |
438 | .objnode_alloc = zcache_objnode_alloc, | |
439 | .objnode_free = zcache_objnode_free, | |
440 | }; | |
441 | ||
442 | static struct page *zcache_alloc_page(void) | |
443 | { | |
444 | struct page *page = alloc_page(ZCACHE_GFP_MASK); | |
445 | ||
446 | if (page != NULL) | |
447 | zcache_pageframes_alloced = | |
448 | atomic_inc_return(&zcache_pageframes_alloced_atomic); | |
449 | return page; | |
450 | } | |
451 | ||
7892e560 | 452 | #ifdef FRONTSWAP_HAS_UNUSE |
faca2ef7 DM |
453 | static void zcache_unacct_page(void) |
454 | { | |
455 | zcache_pageframes_freed = | |
456 | atomic_inc_return(&zcache_pageframes_freed_atomic); | |
457 | } | |
7892e560 | 458 | #endif |
faca2ef7 DM |
459 | |
460 | static void zcache_free_page(struct page *page) | |
461 | { | |
462 | long curr_pageframes; | |
7892e560 | 463 | static long max_pageframes, min_pageframes; |
faca2ef7 DM |
464 | |
465 | if (page == NULL) | |
466 | BUG(); | |
467 | __free_page(page); | |
468 | zcache_pageframes_freed = | |
469 | atomic_inc_return(&zcache_pageframes_freed_atomic); | |
470 | curr_pageframes = zcache_pageframes_alloced - | |
471 | atomic_read(&zcache_pageframes_freed_atomic) - | |
472 | atomic_read(&zcache_eph_pageframes_atomic) - | |
473 | atomic_read(&zcache_pers_pageframes_atomic); | |
474 | if (curr_pageframes > max_pageframes) | |
475 | max_pageframes = curr_pageframes; | |
476 | if (curr_pageframes < min_pageframes) | |
477 | min_pageframes = curr_pageframes; | |
478 | #ifdef ZCACHE_DEBUG | |
479 | if (curr_pageframes > 2L || curr_pageframes < -2L) { | |
480 | /* pr_info here */ | |
481 | } | |
482 | #endif | |
483 | } | |
484 | ||
485 | /* | |
486 | * zcache implementations for PAM page descriptor ops | |
487 | */ | |
488 | ||
489 | /* forward reference */ | |
490 | static void zcache_compress(struct page *from, | |
491 | void **out_va, unsigned *out_len); | |
492 | ||
493 | static struct page *zcache_evict_eph_pageframe(void); | |
494 | ||
495 | static void *zcache_pampd_eph_create(char *data, size_t size, bool raw, | |
496 | struct tmem_handle *th) | |
497 | { | |
498 | void *pampd = NULL, *cdata = data; | |
499 | unsigned clen = size; | |
500 | struct page *page = (struct page *)(data), *newpage; | |
501 | ||
502 | if (!raw) { | |
503 | zcache_compress(page, &cdata, &clen); | |
504 | if (clen > zbud_max_buddy_size()) { | |
505 | zcache_compress_poor++; | |
506 | goto out; | |
507 | } | |
508 | } else { | |
509 | BUG_ON(clen > zbud_max_buddy_size()); | |
510 | } | |
511 | ||
512 | /* look for space via an existing match first */ | |
513 | pampd = (void *)zbud_match_prep(th, true, cdata, clen); | |
514 | if (pampd != NULL) | |
515 | goto got_pampd; | |
516 | ||
517 | /* no match, now we need to find (or free up) a full page */ | |
518 | newpage = zcache_alloc_page(); | |
519 | if (newpage != NULL) | |
520 | goto create_in_new_page; | |
521 | ||
522 | zcache_failed_getfreepages++; | |
523 | /* can't allocate a page, evict an ephemeral page via LRU */ | |
524 | newpage = zcache_evict_eph_pageframe(); | |
525 | if (newpage == NULL) { | |
526 | zcache_eph_ate_tail_failed++; | |
527 | goto out; | |
528 | } | |
529 | zcache_eph_ate_tail++; | |
530 | ||
531 | create_in_new_page: | |
532 | pampd = (void *)zbud_create_prep(th, true, cdata, clen, newpage); | |
533 | BUG_ON(pampd == NULL); | |
534 | zcache_eph_pageframes = | |
535 | atomic_inc_return(&zcache_eph_pageframes_atomic); | |
536 | if (zcache_eph_pageframes > zcache_eph_pageframes_max) | |
537 | zcache_eph_pageframes_max = zcache_eph_pageframes; | |
538 | ||
539 | got_pampd: | |
540 | zcache_eph_zbytes = | |
541 | atomic_long_add_return(clen, &zcache_eph_zbytes_atomic); | |
542 | if (zcache_eph_zbytes > zcache_eph_zbytes_max) | |
543 | zcache_eph_zbytes_max = zcache_eph_zbytes; | |
544 | zcache_eph_zpages = atomic_inc_return(&zcache_eph_zpages_atomic); | |
545 | if (zcache_eph_zpages > zcache_eph_zpages_max) | |
546 | zcache_eph_zpages_max = zcache_eph_zpages; | |
547 | if (ramster_enabled && raw) | |
548 | ramster_count_foreign_pages(true, 1); | |
549 | out: | |
550 | return pampd; | |
551 | } | |
552 | ||
553 | static void *zcache_pampd_pers_create(char *data, size_t size, bool raw, | |
554 | struct tmem_handle *th) | |
555 | { | |
556 | void *pampd = NULL, *cdata = data; | |
557 | unsigned clen = size; | |
558 | struct page *page = (struct page *)(data), *newpage; | |
559 | unsigned long zbud_mean_zsize; | |
560 | unsigned long curr_pers_zpages, total_zsize; | |
561 | ||
562 | if (data == NULL) { | |
563 | BUG_ON(!ramster_enabled); | |
564 | goto create_pampd; | |
565 | } | |
566 | curr_pers_zpages = zcache_pers_zpages; | |
567 | /* FIXME CONFIG_RAMSTER... subtract atomic remote_pers_pages here? */ | |
568 | if (!raw) | |
569 | zcache_compress(page, &cdata, &clen); | |
570 | /* reject if compression is too poor */ | |
571 | if (clen > zbud_max_zsize) { | |
572 | zcache_compress_poor++; | |
573 | goto out; | |
574 | } | |
575 | /* reject if mean compression is too poor */ | |
576 | if ((clen > zbud_max_mean_zsize) && (curr_pers_zpages > 0)) { | |
577 | total_zsize = zcache_pers_zbytes; | |
578 | if ((long)total_zsize < 0) | |
579 | total_zsize = 0; | |
580 | zbud_mean_zsize = div_u64(total_zsize, | |
581 | curr_pers_zpages); | |
582 | if (zbud_mean_zsize > zbud_max_mean_zsize) { | |
583 | zcache_mean_compress_poor++; | |
584 | goto out; | |
585 | } | |
586 | } | |
587 | ||
588 | create_pampd: | |
589 | /* look for space via an existing match first */ | |
590 | pampd = (void *)zbud_match_prep(th, false, cdata, clen); | |
591 | if (pampd != NULL) | |
592 | goto got_pampd; | |
593 | ||
594 | /* no match, now we need to find (or free up) a full page */ | |
595 | newpage = zcache_alloc_page(); | |
596 | if (newpage != NULL) | |
597 | goto create_in_new_page; | |
598 | /* | |
599 | * FIXME do the following only if eph is oversized? | |
600 | * if (zcache_eph_pageframes > | |
601 | * (global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE) + | |
602 | * global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE))) | |
603 | */ | |
604 | zcache_failed_getfreepages++; | |
605 | /* can't allocate a page, evict an ephemeral page via LRU */ | |
606 | newpage = zcache_evict_eph_pageframe(); | |
607 | if (newpage == NULL) { | |
608 | zcache_pers_ate_eph_failed++; | |
609 | goto out; | |
610 | } | |
611 | zcache_pers_ate_eph++; | |
612 | ||
613 | create_in_new_page: | |
614 | pampd = (void *)zbud_create_prep(th, false, cdata, clen, newpage); | |
615 | BUG_ON(pampd == NULL); | |
616 | zcache_pers_pageframes = | |
617 | atomic_inc_return(&zcache_pers_pageframes_atomic); | |
618 | if (zcache_pers_pageframes > zcache_pers_pageframes_max) | |
619 | zcache_pers_pageframes_max = zcache_pers_pageframes; | |
620 | ||
621 | got_pampd: | |
622 | zcache_pers_zpages = atomic_inc_return(&zcache_pers_zpages_atomic); | |
623 | if (zcache_pers_zpages > zcache_pers_zpages_max) | |
624 | zcache_pers_zpages_max = zcache_pers_zpages; | |
625 | zcache_pers_zbytes = | |
626 | atomic_long_add_return(clen, &zcache_pers_zbytes_atomic); | |
627 | if (zcache_pers_zbytes > zcache_pers_zbytes_max) | |
628 | zcache_pers_zbytes_max = zcache_pers_zbytes; | |
629 | if (ramster_enabled && raw) | |
630 | ramster_count_foreign_pages(false, 1); | |
631 | out: | |
632 | return pampd; | |
633 | } | |
634 | ||
635 | /* | |
636 | * This is called directly from zcache_put_page to pre-allocate space | |
637 | * to store a zpage. | |
638 | */ | |
639 | void *zcache_pampd_create(char *data, unsigned int size, bool raw, | |
640 | int eph, struct tmem_handle *th) | |
641 | { | |
642 | void *pampd = NULL; | |
643 | struct zcache_preload *kp; | |
644 | struct tmem_objnode *objnode; | |
645 | struct tmem_obj *obj; | |
646 | int i; | |
647 | ||
648 | BUG_ON(!irqs_disabled()); | |
649 | /* pre-allocate per-cpu metadata */ | |
650 | BUG_ON(zcache_objnode_cache == NULL); | |
651 | BUG_ON(zcache_obj_cache == NULL); | |
652 | kp = &__get_cpu_var(zcache_preloads); | |
653 | for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { | |
654 | objnode = kp->objnodes[i]; | |
655 | if (objnode == NULL) { | |
656 | objnode = kmem_cache_alloc(zcache_objnode_cache, | |
657 | ZCACHE_GFP_MASK); | |
658 | if (unlikely(objnode == NULL)) { | |
659 | zcache_failed_alloc++; | |
660 | goto out; | |
661 | } | |
662 | kp->objnodes[i] = objnode; | |
663 | } | |
664 | } | |
665 | if (kp->obj == NULL) { | |
666 | obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); | |
667 | kp->obj = obj; | |
668 | } | |
669 | if (unlikely(kp->obj == NULL)) { | |
670 | zcache_failed_alloc++; | |
671 | goto out; | |
672 | } | |
673 | /* | |
674 | * ok, have all the metadata pre-allocated, now do the data | |
675 | * but since how we allocate the data is dependent on ephemeral | |
676 | * or persistent, we split the call here to different sub-functions | |
677 | */ | |
678 | if (eph) | |
679 | pampd = zcache_pampd_eph_create(data, size, raw, th); | |
680 | else | |
681 | pampd = zcache_pampd_pers_create(data, size, raw, th); | |
682 | out: | |
683 | return pampd; | |
684 | } | |
685 | ||
686 | /* | |
687 | * This is a pamops called via tmem_put and is necessary to "finish" | |
688 | * a pampd creation. | |
689 | */ | |
690 | void zcache_pampd_create_finish(void *pampd, bool eph) | |
691 | { | |
692 | zbud_create_finish((struct zbudref *)pampd, eph); | |
693 | } | |
694 | ||
695 | /* | |
696 | * This is passed as a function parameter to zbud_decompress so that | |
697 | * zbud need not be familiar with the details of crypto. It assumes that | |
698 | * the bytes from_va and to_va through from_va+size-1 and to_va+size-1 are | |
699 | * kmapped. It must be successful, else there is a logic bug somewhere. | |
700 | */ | |
701 | static void zcache_decompress(char *from_va, unsigned int size, char *to_va) | |
702 | { | |
703 | int ret; | |
704 | unsigned int outlen = PAGE_SIZE; | |
705 | ||
706 | ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size, | |
707 | to_va, &outlen); | |
708 | BUG_ON(ret); | |
709 | BUG_ON(outlen != PAGE_SIZE); | |
710 | } | |
711 | ||
712 | /* | |
713 | * Decompress from the kernel va to a pageframe | |
714 | */ | |
715 | void zcache_decompress_to_page(char *from_va, unsigned int size, | |
716 | struct page *to_page) | |
717 | { | |
718 | char *to_va = kmap_atomic(to_page); | |
719 | zcache_decompress(from_va, size, to_va); | |
720 | kunmap_atomic(to_va); | |
721 | } | |
722 | ||
723 | /* | |
724 | * fill the pageframe corresponding to the struct page with the data | |
725 | * from the passed pampd | |
726 | */ | |
727 | static int zcache_pampd_get_data(char *data, size_t *sizep, bool raw, | |
728 | void *pampd, struct tmem_pool *pool, | |
729 | struct tmem_oid *oid, uint32_t index) | |
730 | { | |
731 | int ret; | |
732 | bool eph = !is_persistent(pool); | |
733 | ||
734 | BUG_ON(preemptible()); | |
735 | BUG_ON(eph); /* fix later if shared pools get implemented */ | |
736 | BUG_ON(pampd_is_remote(pampd)); | |
737 | if (raw) | |
738 | ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd, | |
739 | sizep, eph); | |
740 | else { | |
741 | ret = zbud_decompress((struct page *)(data), | |
742 | (struct zbudref *)pampd, false, | |
743 | zcache_decompress); | |
744 | *sizep = PAGE_SIZE; | |
745 | } | |
746 | return ret; | |
747 | } | |
748 | ||
749 | /* | |
750 | * fill the pageframe corresponding to the struct page with the data | |
751 | * from the passed pampd | |
752 | */ | |
753 | static int zcache_pampd_get_data_and_free(char *data, size_t *sizep, bool raw, | |
754 | void *pampd, struct tmem_pool *pool, | |
755 | struct tmem_oid *oid, uint32_t index) | |
756 | { | |
757 | int ret; | |
758 | bool eph = !is_persistent(pool); | |
759 | struct page *page = NULL; | |
760 | unsigned int zsize, zpages; | |
761 | ||
762 | BUG_ON(preemptible()); | |
763 | BUG_ON(pampd_is_remote(pampd)); | |
764 | if (raw) | |
765 | ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd, | |
766 | sizep, eph); | |
767 | else { | |
768 | ret = zbud_decompress((struct page *)(data), | |
769 | (struct zbudref *)pampd, eph, | |
770 | zcache_decompress); | |
771 | *sizep = PAGE_SIZE; | |
772 | } | |
773 | page = zbud_free_and_delist((struct zbudref *)pampd, eph, | |
774 | &zsize, &zpages); | |
775 | if (eph) { | |
776 | if (page) | |
777 | zcache_eph_pageframes = | |
778 | atomic_dec_return(&zcache_eph_pageframes_atomic); | |
779 | zcache_eph_zpages = | |
780 | atomic_sub_return(zpages, &zcache_eph_zpages_atomic); | |
781 | zcache_eph_zbytes = | |
782 | atomic_long_sub_return(zsize, &zcache_eph_zbytes_atomic); | |
783 | } else { | |
784 | if (page) | |
785 | zcache_pers_pageframes = | |
786 | atomic_dec_return(&zcache_pers_pageframes_atomic); | |
787 | zcache_pers_zpages = | |
788 | atomic_sub_return(zpages, &zcache_pers_zpages_atomic); | |
789 | zcache_pers_zbytes = | |
790 | atomic_long_sub_return(zsize, &zcache_pers_zbytes_atomic); | |
791 | } | |
792 | if (!is_local_client(pool->client)) | |
793 | ramster_count_foreign_pages(eph, -1); | |
794 | if (page) | |
795 | zcache_free_page(page); | |
796 | return ret; | |
797 | } | |
798 | ||
799 | /* | |
800 | * free the pampd and remove it from any zcache lists | |
801 | * pampd must no longer be pointed to from any tmem data structures! | |
802 | */ | |
803 | static void zcache_pampd_free(void *pampd, struct tmem_pool *pool, | |
804 | struct tmem_oid *oid, uint32_t index, bool acct) | |
805 | { | |
806 | struct page *page = NULL; | |
807 | unsigned int zsize, zpages; | |
808 | ||
809 | BUG_ON(preemptible()); | |
810 | if (pampd_is_remote(pampd)) { | |
811 | BUG_ON(!ramster_enabled); | |
812 | pampd = ramster_pampd_free(pampd, pool, oid, index, acct); | |
813 | if (pampd == NULL) | |
814 | return; | |
815 | } | |
816 | if (is_ephemeral(pool)) { | |
817 | page = zbud_free_and_delist((struct zbudref *)pampd, | |
818 | true, &zsize, &zpages); | |
819 | if (page) | |
820 | zcache_eph_pageframes = | |
821 | atomic_dec_return(&zcache_eph_pageframes_atomic); | |
822 | zcache_eph_zpages = | |
823 | atomic_sub_return(zpages, &zcache_eph_zpages_atomic); | |
824 | zcache_eph_zbytes = | |
825 | atomic_long_sub_return(zsize, &zcache_eph_zbytes_atomic); | |
826 | /* FIXME CONFIG_RAMSTER... check acct parameter? */ | |
827 | } else { | |
828 | page = zbud_free_and_delist((struct zbudref *)pampd, | |
829 | false, &zsize, &zpages); | |
830 | if (page) | |
831 | zcache_pers_pageframes = | |
832 | atomic_dec_return(&zcache_pers_pageframes_atomic); | |
833 | zcache_pers_zpages = | |
834 | atomic_sub_return(zpages, &zcache_pers_zpages_atomic); | |
835 | zcache_pers_zbytes = | |
836 | atomic_long_sub_return(zsize, &zcache_pers_zbytes_atomic); | |
837 | } | |
838 | if (!is_local_client(pool->client)) | |
839 | ramster_count_foreign_pages(is_ephemeral(pool), -1); | |
840 | if (page) | |
841 | zcache_free_page(page); | |
842 | } | |
843 | ||
844 | static struct tmem_pamops zcache_pamops = { | |
845 | .create_finish = zcache_pampd_create_finish, | |
846 | .get_data = zcache_pampd_get_data, | |
847 | .get_data_and_free = zcache_pampd_get_data_and_free, | |
848 | .free = zcache_pampd_free, | |
849 | }; | |
850 | ||
851 | /* | |
852 | * zcache compression/decompression and related per-cpu stuff | |
853 | */ | |
854 | ||
855 | static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); | |
856 | #define ZCACHE_DSTMEM_ORDER 1 | |
857 | ||
858 | static void zcache_compress(struct page *from, void **out_va, unsigned *out_len) | |
859 | { | |
860 | int ret; | |
861 | unsigned char *dmem = __get_cpu_var(zcache_dstmem); | |
862 | char *from_va; | |
863 | ||
864 | BUG_ON(!irqs_disabled()); | |
865 | /* no buffer or no compressor so can't compress */ | |
866 | BUG_ON(dmem == NULL); | |
867 | *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER; | |
868 | from_va = kmap_atomic(from); | |
869 | mb(); | |
870 | ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem, | |
871 | out_len); | |
872 | BUG_ON(ret); | |
873 | *out_va = dmem; | |
874 | kunmap_atomic(from_va); | |
875 | } | |
876 | ||
877 | static int zcache_comp_cpu_up(int cpu) | |
878 | { | |
879 | struct crypto_comp *tfm; | |
880 | ||
881 | tfm = crypto_alloc_comp(zcache_comp_name, 0, 0); | |
882 | if (IS_ERR(tfm)) | |
883 | return NOTIFY_BAD; | |
884 | *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm; | |
885 | return NOTIFY_OK; | |
886 | } | |
887 | ||
888 | static void zcache_comp_cpu_down(int cpu) | |
889 | { | |
890 | struct crypto_comp *tfm; | |
891 | ||
892 | tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu); | |
893 | crypto_free_comp(tfm); | |
894 | *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; | |
895 | } | |
896 | ||
897 | static int zcache_cpu_notifier(struct notifier_block *nb, | |
898 | unsigned long action, void *pcpu) | |
899 | { | |
900 | int ret, i, cpu = (long)pcpu; | |
901 | struct zcache_preload *kp; | |
902 | ||
903 | switch (action) { | |
904 | case CPU_UP_PREPARE: | |
905 | ret = zcache_comp_cpu_up(cpu); | |
906 | if (ret != NOTIFY_OK) { | |
907 | pr_err("%s: can't allocate compressor xform\n", | |
908 | namestr); | |
909 | return ret; | |
910 | } | |
911 | per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( | |
912 | GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER); | |
913 | if (ramster_enabled) | |
914 | ramster_cpu_up(cpu); | |
915 | break; | |
916 | case CPU_DEAD: | |
917 | case CPU_UP_CANCELED: | |
918 | zcache_comp_cpu_down(cpu); | |
919 | free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), | |
920 | ZCACHE_DSTMEM_ORDER); | |
921 | per_cpu(zcache_dstmem, cpu) = NULL; | |
922 | kp = &per_cpu(zcache_preloads, cpu); | |
923 | for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) { | |
924 | if (kp->objnodes[i]) | |
925 | kmem_cache_free(zcache_objnode_cache, | |
926 | kp->objnodes[i]); | |
927 | } | |
928 | if (kp->obj) { | |
929 | kmem_cache_free(zcache_obj_cache, kp->obj); | |
930 | kp->obj = NULL; | |
931 | } | |
932 | if (ramster_enabled) | |
933 | ramster_cpu_down(cpu); | |
934 | break; | |
935 | default: | |
936 | break; | |
937 | } | |
938 | return NOTIFY_OK; | |
939 | } | |
940 | ||
941 | static struct notifier_block zcache_cpu_notifier_block = { | |
942 | .notifier_call = zcache_cpu_notifier | |
943 | }; | |
944 | ||
945 | /* | |
946 | * The following code interacts with the zbud eviction and zbud | |
947 | * zombify code to access LRU pages | |
948 | */ | |
949 | ||
950 | static struct page *zcache_evict_eph_pageframe(void) | |
951 | { | |
952 | struct page *page; | |
953 | unsigned int zsize = 0, zpages = 0; | |
954 | ||
955 | page = zbud_evict_pageframe_lru(&zsize, &zpages); | |
956 | if (page == NULL) | |
957 | goto out; | |
958 | zcache_eph_zbytes = atomic_long_sub_return(zsize, | |
959 | &zcache_eph_zbytes_atomic); | |
960 | zcache_eph_zpages = atomic_sub_return(zpages, | |
961 | &zcache_eph_zpages_atomic); | |
962 | zcache_evicted_eph_zpages++; | |
963 | zcache_eph_pageframes = | |
964 | atomic_dec_return(&zcache_eph_pageframes_atomic); | |
965 | zcache_evicted_eph_pageframes++; | |
966 | out: | |
967 | return page; | |
968 | } | |
969 | ||
7892e560 | 970 | #ifdef FRONTSWAP_HAS_UNUSE |
faca2ef7 DM |
971 | static void unswiz(struct tmem_oid oid, u32 index, |
972 | unsigned *type, pgoff_t *offset); | |
7892e560 | 973 | |
faca2ef7 DM |
974 | /* |
975 | * Choose an LRU persistent pageframe and attempt to "unuse" it by | |
976 | * calling frontswap_unuse on both zpages. | |
977 | * | |
978 | * This is work-in-progress. | |
979 | */ | |
980 | ||
981 | static int zcache_frontswap_unuse(void) | |
982 | { | |
983 | struct tmem_handle th[2]; | |
984 | int ret = -ENOMEM; | |
985 | int nzbuds, unuse_ret; | |
986 | unsigned type; | |
987 | struct page *newpage1 = NULL, *newpage2 = NULL; | |
988 | struct page *evictpage1 = NULL, *evictpage2 = NULL; | |
989 | pgoff_t offset; | |
990 | ||
991 | newpage1 = alloc_page(ZCACHE_GFP_MASK); | |
992 | newpage2 = alloc_page(ZCACHE_GFP_MASK); | |
993 | if (newpage1 == NULL) | |
994 | evictpage1 = zcache_evict_eph_pageframe(); | |
995 | if (newpage2 == NULL) | |
996 | evictpage2 = zcache_evict_eph_pageframe(); | |
997 | if (evictpage1 == NULL || evictpage2 == NULL) | |
998 | goto free_and_out; | |
999 | /* ok, we have two pages pre-allocated */ | |
1000 | nzbuds = zbud_make_zombie_lru(&th[0], NULL, NULL, false); | |
1001 | if (nzbuds == 0) { | |
1002 | ret = -ENOENT; | |
1003 | goto free_and_out; | |
1004 | } | |
1005 | unswiz(th[0].oid, th[0].index, &type, &offset); | |
1006 | unuse_ret = frontswap_unuse(type, offset, | |
1007 | newpage1 != NULL ? newpage1 : evictpage1, | |
1008 | ZCACHE_GFP_MASK); | |
1009 | if (unuse_ret != 0) | |
1010 | goto free_and_out; | |
1011 | else if (evictpage1 != NULL) | |
1012 | zcache_unacct_page(); | |
1013 | newpage1 = NULL; | |
1014 | evictpage1 = NULL; | |
1015 | if (nzbuds == 2) { | |
1016 | unswiz(th[1].oid, th[1].index, &type, &offset); | |
1017 | unuse_ret = frontswap_unuse(type, offset, | |
1018 | newpage2 != NULL ? newpage2 : evictpage2, | |
1019 | ZCACHE_GFP_MASK); | |
1020 | if (unuse_ret != 0) { | |
1021 | goto free_and_out; | |
1022 | } else if (evictpage2 != NULL) { | |
1023 | zcache_unacct_page(); | |
1024 | } | |
1025 | } | |
1026 | ret = 0; | |
1027 | goto out; | |
1028 | ||
1029 | free_and_out: | |
1030 | if (newpage1 != NULL) | |
1031 | __free_page(newpage1); | |
1032 | if (newpage2 != NULL) | |
1033 | __free_page(newpage2); | |
1034 | if (evictpage1 != NULL) | |
1035 | zcache_free_page(evictpage1); | |
1036 | if (evictpage2 != NULL) | |
1037 | zcache_free_page(evictpage2); | |
1038 | out: | |
1039 | return ret; | |
1040 | } | |
1041 | #endif | |
1042 | ||
1043 | /* | |
1044 | * When zcache is disabled ("frozen"), pools can be created and destroyed, | |
1045 | * but all puts (and thus all other operations that require memory allocation) | |
1046 | * must fail. If zcache is unfrozen, accepts puts, then frozen again, | |
1047 | * data consistency requires all puts while frozen to be converted into | |
1048 | * flushes. | |
1049 | */ | |
1050 | static bool zcache_freeze; | |
1051 | ||
1052 | /* | |
1053 | * This zcache shrinker interface reduces the number of ephemeral pageframes | |
1054 | * used by zcache to approximately the same as the total number of LRU_FILE | |
1055 | * pageframes in use. | |
1056 | */ | |
1057 | static int shrink_zcache_memory(struct shrinker *shrink, | |
1058 | struct shrink_control *sc) | |
1059 | { | |
1060 | static bool in_progress; | |
1061 | int ret = -1; | |
1062 | int nr = sc->nr_to_scan; | |
1063 | int nr_evict = 0; | |
1064 | int nr_unuse = 0; | |
1065 | struct page *page; | |
7892e560 | 1066 | #ifdef FRONTSWAP_HAS_UNUSE |
faca2ef7 | 1067 | int unuse_ret; |
7892e560 | 1068 | #endif |
faca2ef7 DM |
1069 | |
1070 | if (nr <= 0) | |
1071 | goto skip_evict; | |
1072 | ||
1073 | /* don't allow more than one eviction thread at a time */ | |
1074 | if (in_progress) | |
1075 | goto skip_evict; | |
1076 | ||
1077 | in_progress = true; | |
1078 | ||
1079 | /* we are going to ignore nr, and target a different value */ | |
1080 | zcache_last_active_file_pageframes = | |
1081 | global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); | |
1082 | zcache_last_inactive_file_pageframes = | |
1083 | global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE); | |
1084 | nr_evict = zcache_eph_pageframes - zcache_last_active_file_pageframes + | |
1085 | zcache_last_inactive_file_pageframes; | |
1086 | while (nr_evict-- > 0) { | |
1087 | page = zcache_evict_eph_pageframe(); | |
1088 | if (page == NULL) | |
1089 | break; | |
1090 | zcache_free_page(page); | |
1091 | } | |
1092 | ||
1093 | zcache_last_active_anon_pageframes = | |
1094 | global_page_state(NR_LRU_BASE + LRU_ACTIVE_ANON); | |
1095 | zcache_last_inactive_anon_pageframes = | |
1096 | global_page_state(NR_LRU_BASE + LRU_INACTIVE_ANON); | |
1097 | nr_unuse = zcache_pers_pageframes - zcache_last_active_anon_pageframes + | |
1098 | zcache_last_inactive_anon_pageframes; | |
1099 | #ifdef FRONTSWAP_HAS_UNUSE | |
1100 | /* rate limit for testing */ | |
1101 | if (nr_unuse > 32) | |
1102 | nr_unuse = 32; | |
1103 | while (nr_unuse-- > 0) { | |
1104 | unuse_ret = zcache_frontswap_unuse(); | |
1105 | if (unuse_ret == -ENOMEM) | |
1106 | break; | |
1107 | } | |
1108 | #endif | |
1109 | in_progress = false; | |
1110 | ||
1111 | skip_evict: | |
1112 | /* resample: has changed, but maybe not all the way yet */ | |
1113 | zcache_last_active_file_pageframes = | |
1114 | global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); | |
1115 | zcache_last_inactive_file_pageframes = | |
1116 | global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE); | |
1117 | ret = zcache_eph_pageframes - zcache_last_active_file_pageframes + | |
1118 | zcache_last_inactive_file_pageframes; | |
1119 | if (ret < 0) | |
1120 | ret = 0; | |
1121 | return ret; | |
1122 | } | |
1123 | ||
1124 | static struct shrinker zcache_shrinker = { | |
1125 | .shrink = shrink_zcache_memory, | |
1126 | .seeks = DEFAULT_SEEKS, | |
1127 | }; | |
1128 | ||
1129 | /* | |
1130 | * zcache shims between cleancache/frontswap ops and tmem | |
1131 | */ | |
1132 | ||
1133 | /* FIXME rename these core routines to zcache_tmemput etc? */ | |
1134 | int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp, | |
1135 | uint32_t index, void *page, | |
1136 | unsigned int size, bool raw, int ephemeral) | |
1137 | { | |
1138 | struct tmem_pool *pool; | |
1139 | struct tmem_handle th; | |
1140 | int ret = -1; | |
1141 | void *pampd = NULL; | |
1142 | ||
1143 | BUG_ON(!irqs_disabled()); | |
1144 | pool = zcache_get_pool_by_id(cli_id, pool_id); | |
1145 | if (unlikely(pool == NULL)) | |
1146 | goto out; | |
1147 | if (!zcache_freeze) { | |
1148 | ret = 0; | |
1149 | th.client_id = cli_id; | |
1150 | th.pool_id = pool_id; | |
1151 | th.oid = *oidp; | |
1152 | th.index = index; | |
1153 | pampd = zcache_pampd_create((char *)page, size, raw, | |
1154 | ephemeral, &th); | |
1155 | if (pampd == NULL) { | |
1156 | ret = -ENOMEM; | |
1157 | if (ephemeral) | |
1158 | zcache_failed_eph_puts++; | |
1159 | else | |
1160 | zcache_failed_pers_puts++; | |
1161 | } else { | |
1162 | if (ramster_enabled) | |
1163 | ramster_do_preload_flnode(pool); | |
1164 | ret = tmem_put(pool, oidp, index, 0, pampd); | |
1165 | if (ret < 0) | |
1166 | BUG(); | |
1167 | } | |
1168 | zcache_put_pool(pool); | |
1169 | } else { | |
1170 | zcache_put_to_flush++; | |
1171 | if (ramster_enabled) | |
1172 | ramster_do_preload_flnode(pool); | |
1173 | if (atomic_read(&pool->obj_count) > 0) | |
1174 | /* the put fails whether the flush succeeds or not */ | |
1175 | (void)tmem_flush_page(pool, oidp, index); | |
1176 | zcache_put_pool(pool); | |
1177 | } | |
1178 | out: | |
1179 | return ret; | |
1180 | } | |
1181 | ||
1182 | int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp, | |
1183 | uint32_t index, void *page, | |
1184 | size_t *sizep, bool raw, int get_and_free) | |
1185 | { | |
1186 | struct tmem_pool *pool; | |
1187 | int ret = -1; | |
1188 | bool eph; | |
1189 | ||
1190 | if (!raw) { | |
1191 | BUG_ON(irqs_disabled()); | |
1192 | BUG_ON(in_softirq()); | |
1193 | } | |
1194 | pool = zcache_get_pool_by_id(cli_id, pool_id); | |
1195 | eph = is_ephemeral(pool); | |
1196 | if (likely(pool != NULL)) { | |
1197 | if (atomic_read(&pool->obj_count) > 0) | |
1198 | ret = tmem_get(pool, oidp, index, (char *)(page), | |
1199 | sizep, raw, get_and_free); | |
1200 | zcache_put_pool(pool); | |
1201 | } | |
1202 | WARN_ONCE((!is_ephemeral(pool) && (ret != 0)), | |
1203 | "zcache_get fails on persistent pool, " | |
1204 | "bad things are very likely to happen soon\n"); | |
1205 | #ifdef RAMSTER_TESTING | |
1206 | if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool))) | |
1207 | pr_err("TESTING zcache_get tmem_get returns ret=%d\n", ret); | |
1208 | #endif | |
1209 | return ret; | |
1210 | } | |
1211 | ||
1212 | int zcache_flush_page(int cli_id, int pool_id, | |
1213 | struct tmem_oid *oidp, uint32_t index) | |
1214 | { | |
1215 | struct tmem_pool *pool; | |
1216 | int ret = -1; | |
1217 | unsigned long flags; | |
1218 | ||
1219 | local_irq_save(flags); | |
1220 | zcache_flush_total++; | |
1221 | pool = zcache_get_pool_by_id(cli_id, pool_id); | |
1222 | if (ramster_enabled) | |
1223 | ramster_do_preload_flnode(pool); | |
1224 | if (likely(pool != NULL)) { | |
1225 | if (atomic_read(&pool->obj_count) > 0) | |
1226 | ret = tmem_flush_page(pool, oidp, index); | |
1227 | zcache_put_pool(pool); | |
1228 | } | |
1229 | if (ret >= 0) | |
1230 | zcache_flush_found++; | |
1231 | local_irq_restore(flags); | |
1232 | return ret; | |
1233 | } | |
1234 | ||
1235 | int zcache_flush_object(int cli_id, int pool_id, | |
1236 | struct tmem_oid *oidp) | |
1237 | { | |
1238 | struct tmem_pool *pool; | |
1239 | int ret = -1; | |
1240 | unsigned long flags; | |
1241 | ||
1242 | local_irq_save(flags); | |
1243 | zcache_flobj_total++; | |
1244 | pool = zcache_get_pool_by_id(cli_id, pool_id); | |
1245 | if (ramster_enabled) | |
1246 | ramster_do_preload_flnode(pool); | |
1247 | if (likely(pool != NULL)) { | |
1248 | if (atomic_read(&pool->obj_count) > 0) | |
1249 | ret = tmem_flush_object(pool, oidp); | |
1250 | zcache_put_pool(pool); | |
1251 | } | |
1252 | if (ret >= 0) | |
1253 | zcache_flobj_found++; | |
1254 | local_irq_restore(flags); | |
1255 | return ret; | |
1256 | } | |
1257 | ||
1258 | static int zcache_client_destroy_pool(int cli_id, int pool_id) | |
1259 | { | |
1260 | struct tmem_pool *pool = NULL; | |
1261 | struct zcache_client *cli = NULL; | |
1262 | int ret = -1; | |
1263 | ||
1264 | if (pool_id < 0) | |
1265 | goto out; | |
1266 | if (cli_id == LOCAL_CLIENT) | |
1267 | cli = &zcache_host; | |
1268 | else if ((unsigned int)cli_id < MAX_CLIENTS) | |
1269 | cli = &zcache_clients[cli_id]; | |
1270 | if (cli == NULL) | |
1271 | goto out; | |
1272 | atomic_inc(&cli->refcount); | |
1273 | pool = cli->tmem_pools[pool_id]; | |
1274 | if (pool == NULL) | |
1275 | goto out; | |
1276 | cli->tmem_pools[pool_id] = NULL; | |
1277 | /* wait for pool activity on other cpus to quiesce */ | |
1278 | while (atomic_read(&pool->refcount) != 0) | |
1279 | ; | |
1280 | atomic_dec(&cli->refcount); | |
1281 | local_bh_disable(); | |
1282 | ret = tmem_destroy_pool(pool); | |
1283 | local_bh_enable(); | |
1284 | kfree(pool); | |
1285 | if (cli_id == LOCAL_CLIENT) | |
1286 | pr_info("%s: destroyed local pool id=%d\n", namestr, pool_id); | |
1287 | else | |
1288 | pr_info("%s: destroyed pool id=%d, client=%d\n", | |
1289 | namestr, pool_id, cli_id); | |
1290 | out: | |
1291 | return ret; | |
1292 | } | |
1293 | ||
1294 | int zcache_new_pool(uint16_t cli_id, uint32_t flags) | |
1295 | { | |
1296 | int poolid = -1; | |
1297 | struct tmem_pool *pool; | |
1298 | struct zcache_client *cli = NULL; | |
1299 | ||
1300 | if (cli_id == LOCAL_CLIENT) | |
1301 | cli = &zcache_host; | |
1302 | else if ((unsigned int)cli_id < MAX_CLIENTS) | |
1303 | cli = &zcache_clients[cli_id]; | |
1304 | if (cli == NULL) | |
1305 | goto out; | |
1306 | atomic_inc(&cli->refcount); | |
1307 | pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC); | |
1308 | if (pool == NULL) { | |
1309 | pr_info("%s: pool creation failed: out of memory\n", namestr); | |
1310 | goto out; | |
1311 | } | |
1312 | ||
1313 | for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) | |
1314 | if (cli->tmem_pools[poolid] == NULL) | |
1315 | break; | |
1316 | if (poolid >= MAX_POOLS_PER_CLIENT) { | |
1317 | pr_info("%s: pool creation failed: max exceeded\n", namestr); | |
1318 | kfree(pool); | |
1319 | poolid = -1; | |
1320 | goto out; | |
1321 | } | |
1322 | atomic_set(&pool->refcount, 0); | |
1323 | pool->client = cli; | |
1324 | pool->pool_id = poolid; | |
1325 | tmem_new_pool(pool, flags); | |
1326 | cli->tmem_pools[poolid] = pool; | |
1327 | if (cli_id == LOCAL_CLIENT) | |
1328 | pr_info("%s: created %s local tmem pool, id=%d\n", namestr, | |
1329 | flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", | |
1330 | poolid); | |
1331 | else | |
1332 | pr_info("%s: created %s tmem pool, id=%d, client=%d\n", namestr, | |
1333 | flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", | |
1334 | poolid, cli_id); | |
1335 | out: | |
1336 | if (cli != NULL) | |
1337 | atomic_dec(&cli->refcount); | |
1338 | return poolid; | |
1339 | } | |
1340 | ||
1341 | static int zcache_local_new_pool(uint32_t flags) | |
1342 | { | |
1343 | return zcache_new_pool(LOCAL_CLIENT, flags); | |
1344 | } | |
1345 | ||
f0290de2 | 1346 | int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph) |
faca2ef7 DM |
1347 | { |
1348 | struct tmem_pool *pool; | |
f0290de2 | 1349 | struct zcache_client *cli; |
faca2ef7 DM |
1350 | uint32_t flags = eph ? 0 : TMEM_POOL_PERSIST; |
1351 | int ret = -1; | |
1352 | ||
1353 | BUG_ON(!ramster_enabled); | |
1354 | if (cli_id == LOCAL_CLIENT) | |
1355 | goto out; | |
1356 | if (pool_id >= MAX_POOLS_PER_CLIENT) | |
1357 | goto out; | |
f0290de2 DC |
1358 | if (cli_id >= MAX_CLIENTS) |
1359 | goto out; | |
1360 | ||
1361 | cli = &zcache_clients[cli_id]; | |
faca2ef7 DM |
1362 | if ((eph && disable_cleancache) || (!eph && disable_frontswap)) { |
1363 | pr_err("zcache_autocreate_pool: pool type disabled\n"); | |
1364 | goto out; | |
1365 | } | |
1366 | if (!cli->allocated) { | |
1367 | if (zcache_new_client(cli_id)) { | |
1368 | pr_err("zcache_autocreate_pool: can't create client\n"); | |
1369 | goto out; | |
1370 | } | |
1371 | cli = &zcache_clients[cli_id]; | |
1372 | } | |
1373 | atomic_inc(&cli->refcount); | |
1374 | pool = cli->tmem_pools[pool_id]; | |
1375 | if (pool != NULL) { | |
1376 | if (pool->persistent && eph) { | |
1377 | pr_err("zcache_autocreate_pool: type mismatch\n"); | |
1378 | goto out; | |
1379 | } | |
1380 | ret = 0; | |
1381 | goto out; | |
1382 | } | |
1383 | pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); | |
1384 | if (pool == NULL) { | |
1385 | pr_info("%s: pool creation failed: out of memory\n", namestr); | |
1386 | goto out; | |
1387 | } | |
1388 | atomic_set(&pool->refcount, 0); | |
1389 | pool->client = cli; | |
1390 | pool->pool_id = pool_id; | |
1391 | tmem_new_pool(pool, flags); | |
1392 | cli->tmem_pools[pool_id] = pool; | |
1393 | pr_info("%s: AUTOcreated %s tmem poolid=%d, for remote client=%d\n", | |
1394 | namestr, flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", | |
1395 | pool_id, cli_id); | |
1396 | ret = 0; | |
1397 | out: | |
1398 | if (cli != NULL) | |
1399 | atomic_dec(&cli->refcount); | |
1400 | return ret; | |
1401 | } | |
1402 | ||
1403 | /********** | |
1404 | * Two kernel functionalities currently can be layered on top of tmem. | |
1405 | * These are "cleancache" which is used as a second-chance cache for clean | |
1406 | * page cache pages; and "frontswap" which is used for swap pages | |
1407 | * to avoid writes to disk. A generic "shim" is provided here for each | |
1408 | * to translate in-kernel semantics to zcache semantics. | |
1409 | */ | |
1410 | ||
1411 | static void zcache_cleancache_put_page(int pool_id, | |
1412 | struct cleancache_filekey key, | |
1413 | pgoff_t index, struct page *page) | |
1414 | { | |
1415 | u32 ind = (u32) index; | |
1416 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1417 | ||
1418 | if (!disable_cleancache_ignore_nonactive && !PageWasActive(page)) { | |
1419 | zcache_eph_nonactive_puts_ignored++; | |
1420 | return; | |
1421 | } | |
1422 | if (likely(ind == index)) | |
1423 | (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, | |
1424 | page, PAGE_SIZE, false, 1); | |
1425 | } | |
1426 | ||
1427 | static int zcache_cleancache_get_page(int pool_id, | |
1428 | struct cleancache_filekey key, | |
1429 | pgoff_t index, struct page *page) | |
1430 | { | |
1431 | u32 ind = (u32) index; | |
1432 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1433 | size_t size; | |
1434 | int ret = -1; | |
1435 | ||
1436 | if (likely(ind == index)) { | |
1437 | ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, | |
1438 | page, &size, false, 0); | |
1439 | BUG_ON(ret >= 0 && size != PAGE_SIZE); | |
1440 | if (ret == 0) | |
1441 | SetPageWasActive(page); | |
1442 | } | |
1443 | return ret; | |
1444 | } | |
1445 | ||
1446 | static void zcache_cleancache_flush_page(int pool_id, | |
1447 | struct cleancache_filekey key, | |
1448 | pgoff_t index) | |
1449 | { | |
1450 | u32 ind = (u32) index; | |
1451 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1452 | ||
1453 | if (likely(ind == index)) | |
1454 | (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind); | |
1455 | } | |
1456 | ||
1457 | static void zcache_cleancache_flush_inode(int pool_id, | |
1458 | struct cleancache_filekey key) | |
1459 | { | |
1460 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1461 | ||
1462 | (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid); | |
1463 | } | |
1464 | ||
1465 | static void zcache_cleancache_flush_fs(int pool_id) | |
1466 | { | |
1467 | if (pool_id >= 0) | |
1468 | (void)zcache_client_destroy_pool(LOCAL_CLIENT, pool_id); | |
1469 | } | |
1470 | ||
1471 | static int zcache_cleancache_init_fs(size_t pagesize) | |
1472 | { | |
1473 | BUG_ON(sizeof(struct cleancache_filekey) != | |
1474 | sizeof(struct tmem_oid)); | |
1475 | BUG_ON(pagesize != PAGE_SIZE); | |
1476 | return zcache_local_new_pool(0); | |
1477 | } | |
1478 | ||
1479 | static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) | |
1480 | { | |
1481 | /* shared pools are unsupported and map to private */ | |
1482 | BUG_ON(sizeof(struct cleancache_filekey) != | |
1483 | sizeof(struct tmem_oid)); | |
1484 | BUG_ON(pagesize != PAGE_SIZE); | |
1485 | return zcache_local_new_pool(0); | |
1486 | } | |
1487 | ||
1488 | static struct cleancache_ops zcache_cleancache_ops = { | |
1489 | .put_page = zcache_cleancache_put_page, | |
1490 | .get_page = zcache_cleancache_get_page, | |
1491 | .invalidate_page = zcache_cleancache_flush_page, | |
1492 | .invalidate_inode = zcache_cleancache_flush_inode, | |
1493 | .invalidate_fs = zcache_cleancache_flush_fs, | |
1494 | .init_shared_fs = zcache_cleancache_init_shared_fs, | |
1495 | .init_fs = zcache_cleancache_init_fs | |
1496 | }; | |
1497 | ||
1498 | struct cleancache_ops zcache_cleancache_register_ops(void) | |
1499 | { | |
1500 | struct cleancache_ops old_ops = | |
1501 | cleancache_register_ops(&zcache_cleancache_ops); | |
1502 | ||
1503 | return old_ops; | |
1504 | } | |
1505 | ||
1506 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | |
1507 | static int zcache_frontswap_poolid __read_mostly = -1; | |
1508 | ||
1509 | /* | |
1510 | * Swizzling increases objects per swaptype, increasing tmem concurrency | |
1511 | * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS | |
1512 | * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from | |
1513 | * frontswap_get_page(), but has side-effects. Hence using 8. | |
1514 | */ | |
1515 | #define SWIZ_BITS 8 | |
1516 | #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) | |
1517 | #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) | |
1518 | #define iswiz(_ind) (_ind >> SWIZ_BITS) | |
1519 | ||
1520 | static inline struct tmem_oid oswiz(unsigned type, u32 ind) | |
1521 | { | |
1522 | struct tmem_oid oid = { .oid = { 0 } }; | |
1523 | oid.oid[0] = _oswiz(type, ind); | |
1524 | return oid; | |
1525 | } | |
1526 | ||
7892e560 | 1527 | #ifdef FRONTSWAP_HAS_UNUSE |
faca2ef7 DM |
1528 | static void unswiz(struct tmem_oid oid, u32 index, |
1529 | unsigned *type, pgoff_t *offset) | |
1530 | { | |
1531 | *type = (unsigned)(oid.oid[0] >> SWIZ_BITS); | |
1532 | *offset = (pgoff_t)((index << SWIZ_BITS) | | |
1533 | (oid.oid[0] & SWIZ_MASK)); | |
1534 | } | |
7892e560 | 1535 | #endif |
faca2ef7 DM |
1536 | |
1537 | static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, | |
1538 | struct page *page) | |
1539 | { | |
1540 | u64 ind64 = (u64)offset; | |
1541 | u32 ind = (u32)offset; | |
1542 | struct tmem_oid oid = oswiz(type, ind); | |
1543 | int ret = -1; | |
1544 | unsigned long flags; | |
faca2ef7 DM |
1545 | |
1546 | BUG_ON(!PageLocked(page)); | |
1547 | if (!disable_frontswap_ignore_nonactive && !PageWasActive(page)) { | |
1548 | zcache_pers_nonactive_puts_ignored++; | |
1549 | ret = -ERANGE; | |
1550 | goto out; | |
1551 | } | |
1552 | if (likely(ind64 == ind)) { | |
1553 | local_irq_save(flags); | |
1554 | ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid, | |
1555 | &oid, iswiz(ind), | |
1556 | page, PAGE_SIZE, false, 0); | |
1557 | local_irq_restore(flags); | |
1558 | } | |
1559 | out: | |
1560 | return ret; | |
1561 | } | |
1562 | ||
1563 | /* returns 0 if the page was successfully gotten from frontswap, -1 if | |
1564 | * was not present (should never happen!) */ | |
1565 | static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, | |
1566 | struct page *page) | |
1567 | { | |
1568 | u64 ind64 = (u64)offset; | |
1569 | u32 ind = (u32)offset; | |
1570 | struct tmem_oid oid = oswiz(type, ind); | |
1571 | size_t size; | |
1572 | int ret = -1, get_and_free; | |
1573 | ||
1574 | if (frontswap_has_exclusive_gets) | |
1575 | get_and_free = 1; | |
1576 | else | |
1577 | get_and_free = -1; | |
1578 | BUG_ON(!PageLocked(page)); | |
1579 | if (likely(ind64 == ind)) { | |
1580 | ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid, | |
1581 | &oid, iswiz(ind), | |
1582 | page, &size, false, get_and_free); | |
1583 | BUG_ON(ret >= 0 && size != PAGE_SIZE); | |
1584 | } | |
1585 | return ret; | |
1586 | } | |
1587 | ||
1588 | /* flush a single page from frontswap */ | |
1589 | static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) | |
1590 | { | |
1591 | u64 ind64 = (u64)offset; | |
1592 | u32 ind = (u32)offset; | |
1593 | struct tmem_oid oid = oswiz(type, ind); | |
1594 | ||
1595 | if (likely(ind64 == ind)) | |
1596 | (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid, | |
1597 | &oid, iswiz(ind)); | |
1598 | } | |
1599 | ||
1600 | /* flush all pages from the passed swaptype */ | |
1601 | static void zcache_frontswap_flush_area(unsigned type) | |
1602 | { | |
1603 | struct tmem_oid oid; | |
1604 | int ind; | |
1605 | ||
1606 | for (ind = SWIZ_MASK; ind >= 0; ind--) { | |
1607 | oid = oswiz(type, ind); | |
1608 | (void)zcache_flush_object(LOCAL_CLIENT, | |
1609 | zcache_frontswap_poolid, &oid); | |
1610 | } | |
1611 | } | |
1612 | ||
1613 | static void zcache_frontswap_init(unsigned ignored) | |
1614 | { | |
1615 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | |
1616 | if (zcache_frontswap_poolid < 0) | |
1617 | zcache_frontswap_poolid = | |
1618 | zcache_local_new_pool(TMEM_POOL_PERSIST); | |
1619 | } | |
1620 | ||
1621 | static struct frontswap_ops zcache_frontswap_ops = { | |
1622 | .store = zcache_frontswap_put_page, | |
1623 | .load = zcache_frontswap_get_page, | |
1624 | .invalidate_page = zcache_frontswap_flush_page, | |
1625 | .invalidate_area = zcache_frontswap_flush_area, | |
1626 | .init = zcache_frontswap_init | |
1627 | }; | |
1628 | ||
1629 | struct frontswap_ops zcache_frontswap_register_ops(void) | |
1630 | { | |
1631 | struct frontswap_ops old_ops = | |
1632 | frontswap_register_ops(&zcache_frontswap_ops); | |
1633 | ||
1634 | return old_ops; | |
1635 | } | |
1636 | ||
1637 | /* | |
1638 | * zcache initialization | |
1639 | * NOTE FOR NOW zcache or ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER | |
1640 | * OR NOTHING HAPPENS! | |
1641 | */ | |
1642 | ||
1643 | static int __init enable_zcache(char *s) | |
1644 | { | |
1645 | zcache_enabled = 1; | |
1646 | return 1; | |
1647 | } | |
1648 | __setup("zcache", enable_zcache); | |
1649 | ||
1650 | static int __init enable_ramster(char *s) | |
1651 | { | |
1652 | zcache_enabled = 1; | |
1653 | #ifdef CONFIG_RAMSTER | |
1654 | ramster_enabled = 1; | |
1655 | #endif | |
1656 | return 1; | |
1657 | } | |
1658 | __setup("ramster", enable_ramster); | |
1659 | ||
1660 | /* allow independent dynamic disabling of cleancache and frontswap */ | |
1661 | ||
1662 | static int __init no_cleancache(char *s) | |
1663 | { | |
1664 | disable_cleancache = 1; | |
1665 | return 1; | |
1666 | } | |
1667 | ||
1668 | __setup("nocleancache", no_cleancache); | |
1669 | ||
1670 | static int __init no_frontswap(char *s) | |
1671 | { | |
1672 | disable_frontswap = 1; | |
1673 | return 1; | |
1674 | } | |
1675 | ||
1676 | __setup("nofrontswap", no_frontswap); | |
1677 | ||
1678 | static int __init no_frontswap_exclusive_gets(char *s) | |
1679 | { | |
1680 | frontswap_has_exclusive_gets = false; | |
1681 | return 1; | |
1682 | } | |
1683 | ||
1684 | __setup("nofrontswapexclusivegets", no_frontswap_exclusive_gets); | |
1685 | ||
1686 | static int __init no_frontswap_ignore_nonactive(char *s) | |
1687 | { | |
1688 | disable_frontswap_ignore_nonactive = 1; | |
1689 | return 1; | |
1690 | } | |
1691 | ||
1692 | __setup("nofrontswapignorenonactive", no_frontswap_ignore_nonactive); | |
1693 | ||
1694 | static int __init no_cleancache_ignore_nonactive(char *s) | |
1695 | { | |
1696 | disable_cleancache_ignore_nonactive = 1; | |
1697 | return 1; | |
1698 | } | |
1699 | ||
1700 | __setup("nocleancacheignorenonactive", no_cleancache_ignore_nonactive); | |
1701 | ||
1702 | static int __init enable_zcache_compressor(char *s) | |
1703 | { | |
1704 | strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ); | |
1705 | zcache_enabled = 1; | |
1706 | return 1; | |
1707 | } | |
1708 | __setup("zcache=", enable_zcache_compressor); | |
1709 | ||
1710 | ||
1711 | static int __init zcache_comp_init(void) | |
1712 | { | |
1713 | int ret = 0; | |
1714 | ||
1715 | /* check crypto algorithm */ | |
1716 | if (*zcache_comp_name != '\0') { | |
1717 | ret = crypto_has_comp(zcache_comp_name, 0, 0); | |
1718 | if (!ret) | |
1719 | pr_info("zcache: %s not supported\n", | |
1720 | zcache_comp_name); | |
1721 | } | |
1722 | if (!ret) | |
1723 | strcpy(zcache_comp_name, "lzo"); | |
1724 | ret = crypto_has_comp(zcache_comp_name, 0, 0); | |
1725 | if (!ret) { | |
1726 | ret = 1; | |
1727 | goto out; | |
1728 | } | |
1729 | pr_info("zcache: using %s compressor\n", zcache_comp_name); | |
1730 | ||
1731 | /* alloc percpu transforms */ | |
1732 | ret = 0; | |
1733 | zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); | |
1734 | if (!zcache_comp_pcpu_tfms) | |
1735 | ret = 1; | |
1736 | out: | |
1737 | return ret; | |
1738 | } | |
1739 | ||
1740 | static int __init zcache_init(void) | |
1741 | { | |
1742 | int ret = 0; | |
1743 | ||
1744 | if (ramster_enabled) { | |
1745 | namestr = "ramster"; | |
1746 | ramster_register_pamops(&zcache_pamops); | |
1747 | } | |
1748 | #ifdef CONFIG_DEBUG_FS | |
1749 | zcache_debugfs_init(); | |
1750 | #endif | |
1751 | if (zcache_enabled) { | |
1752 | unsigned int cpu; | |
1753 | ||
1754 | tmem_register_hostops(&zcache_hostops); | |
1755 | tmem_register_pamops(&zcache_pamops); | |
1756 | ret = register_cpu_notifier(&zcache_cpu_notifier_block); | |
1757 | if (ret) { | |
1758 | pr_err("%s: can't register cpu notifier\n", namestr); | |
1759 | goto out; | |
1760 | } | |
1761 | ret = zcache_comp_init(); | |
1762 | if (ret) { | |
1763 | pr_err("%s: compressor initialization failed\n", | |
1764 | namestr); | |
1765 | goto out; | |
1766 | } | |
1767 | for_each_online_cpu(cpu) { | |
1768 | void *pcpu = (void *)(long)cpu; | |
1769 | zcache_cpu_notifier(&zcache_cpu_notifier_block, | |
1770 | CPU_UP_PREPARE, pcpu); | |
1771 | } | |
1772 | } | |
1773 | zcache_objnode_cache = kmem_cache_create("zcache_objnode", | |
1774 | sizeof(struct tmem_objnode), 0, 0, NULL); | |
1775 | zcache_obj_cache = kmem_cache_create("zcache_obj", | |
1776 | sizeof(struct tmem_obj), 0, 0, NULL); | |
1777 | ret = zcache_new_client(LOCAL_CLIENT); | |
1778 | if (ret) { | |
1779 | pr_err("%s: can't create client\n", namestr); | |
1780 | goto out; | |
1781 | } | |
1782 | zbud_init(); | |
1783 | if (zcache_enabled && !disable_cleancache) { | |
1784 | struct cleancache_ops old_ops; | |
1785 | ||
1786 | register_shrinker(&zcache_shrinker); | |
1787 | old_ops = zcache_cleancache_register_ops(); | |
1788 | pr_info("%s: cleancache enabled using kernel transcendent " | |
1789 | "memory and compression buddies\n", namestr); | |
1790 | #ifdef ZCACHE_DEBUG | |
1791 | pr_info("%s: cleancache: ignorenonactive = %d\n", | |
1792 | namestr, !disable_cleancache_ignore_nonactive); | |
1793 | #endif | |
1794 | if (old_ops.init_fs != NULL) | |
1795 | pr_warn("%s: cleancache_ops overridden\n", namestr); | |
1796 | } | |
1797 | if (zcache_enabled && !disable_frontswap) { | |
1798 | struct frontswap_ops old_ops; | |
1799 | ||
1800 | old_ops = zcache_frontswap_register_ops(); | |
1801 | if (frontswap_has_exclusive_gets) | |
1802 | frontswap_tmem_exclusive_gets(true); | |
1803 | pr_info("%s: frontswap enabled using kernel transcendent " | |
1804 | "memory and compression buddies\n", namestr); | |
1805 | #ifdef ZCACHE_DEBUG | |
1806 | pr_info("%s: frontswap: excl gets = %d active only = %d\n", | |
1807 | namestr, frontswap_has_exclusive_gets, | |
1808 | !disable_frontswap_ignore_nonactive); | |
1809 | #endif | |
1810 | if (old_ops.init != NULL) | |
1811 | pr_warn("%s: frontswap_ops overridden\n", namestr); | |
1812 | } | |
1813 | if (ramster_enabled) | |
1814 | ramster_init(!disable_cleancache, !disable_frontswap, | |
1815 | frontswap_has_exclusive_gets); | |
1816 | out: | |
1817 | return ret; | |
1818 | } | |
1819 | ||
1820 | late_initcall(zcache_init); |