Commit | Line | Data |
---|---|---|
9cc06bf8 DM |
1 | /* |
2 | * zcache.c | |
3 | * | |
4 | * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp. | |
5 | * Copyright (c) 2010,2011, Nitin Gupta | |
6 | * | |
7 | * Zcache provides an in-kernel "host implementation" for transcendent memory | |
8 | * and, thus indirectly, for cleancache and frontswap. Zcache includes two | |
9 | * page-accessible memory [1] interfaces, both utilizing lzo1x compression: | |
10 | * 1) "compression buddies" ("zbud") is used for ephemeral pages | |
11 | * 2) xvmalloc is used for persistent pages. | |
12 | * Xvmalloc (based on the TLSF allocator) has very low fragmentation | |
13 | * so maximizes space efficiency, while zbud allows pairs (and potentially, | |
14 | * in the future, more than a pair of) compressed pages to be closely linked | |
15 | * so that reclaiming can be done via the kernel's physical-page-oriented | |
16 | * "shrinker" interface. | |
17 | * | |
18 | * [1] For a definition of page-accessible memory (aka PAM), see: | |
19 | * http://marc.info/?l=linux-mm&m=127811271605009 | |
20 | */ | |
21 | ||
22 | #include <linux/cpu.h> | |
23 | #include <linux/highmem.h> | |
24 | #include <linux/list.h> | |
25 | #include <linux/lzo.h> | |
26 | #include <linux/slab.h> | |
27 | #include <linux/spinlock.h> | |
28 | #include <linux/types.h> | |
29 | #include <linux/atomic.h> | |
30 | #include "tmem.h" | |
31 | ||
32 | #include "../zram/xvmalloc.h" /* if built in drivers/staging */ | |
33 | ||
34 | #if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP)) | |
35 | #error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP" | |
36 | #endif | |
37 | #ifdef CONFIG_CLEANCACHE | |
38 | #include <linux/cleancache.h> | |
39 | #endif | |
40 | #ifdef CONFIG_FRONTSWAP | |
41 | #include <linux/frontswap.h> | |
42 | #endif | |
43 | ||
44 | #if 0 | |
45 | /* this is more aggressive but may cause other problems? */ | |
46 | #define ZCACHE_GFP_MASK (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN) | |
47 | #else | |
48 | #define ZCACHE_GFP_MASK \ | |
49 | (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) | |
50 | #endif | |
51 | ||
52 | /********** | |
53 | * Compression buddies ("zbud") provides for packing two (or, possibly | |
54 | * in the future, more) compressed ephemeral pages into a single "raw" | |
55 | * (physical) page and tracking them with data structures so that | |
56 | * the raw pages can be easily reclaimed. | |
57 | * | |
58 | * A zbud page ("zbpg") is an aligned page containing a list_head, | |
59 | * a lock, and two "zbud headers". The remainder of the physical | |
60 | * page is divided up into aligned 64-byte "chunks" which contain | |
61 | * the compressed data for zero, one, or two zbuds. Each zbpg | |
62 | * resides on: (1) an "unused list" if it has no zbuds; (2) a | |
63 | * "buddied" list if it is fully populated with two zbuds; or | |
64 | * (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks | |
65 | * the one unbuddied zbud uses. The data inside a zbpg cannot be | |
66 | * read or written unless the zbpg's lock is held. | |
67 | */ | |
68 | ||
69 | #define ZBH_SENTINEL 0x43214321 | |
70 | #define ZBPG_SENTINEL 0xdeadbeef | |
71 | ||
72 | #define ZBUD_MAX_BUDS 2 | |
73 | ||
74 | struct zbud_hdr { | |
75 | uint32_t pool_id; | |
76 | struct tmem_oid oid; | |
77 | uint32_t index; | |
78 | uint16_t size; /* compressed size in bytes, zero means unused */ | |
79 | DECL_SENTINEL | |
80 | }; | |
81 | ||
82 | struct zbud_page { | |
83 | struct list_head bud_list; | |
84 | spinlock_t lock; | |
85 | struct zbud_hdr buddy[ZBUD_MAX_BUDS]; | |
86 | DECL_SENTINEL | |
87 | /* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */ | |
88 | }; | |
89 | ||
90 | #define CHUNK_SHIFT 6 | |
91 | #define CHUNK_SIZE (1 << CHUNK_SHIFT) | |
92 | #define CHUNK_MASK (~(CHUNK_SIZE-1)) | |
93 | #define NCHUNKS (((PAGE_SIZE - sizeof(struct zbud_page)) & \ | |
94 | CHUNK_MASK) >> CHUNK_SHIFT) | |
95 | #define MAX_CHUNK (NCHUNKS-1) | |
96 | ||
97 | static struct { | |
98 | struct list_head list; | |
99 | unsigned count; | |
100 | } zbud_unbuddied[NCHUNKS]; | |
101 | /* list N contains pages with N chunks USED and NCHUNKS-N unused */ | |
102 | /* element 0 is never used but optimizing that isn't worth it */ | |
103 | static unsigned long zbud_cumul_chunk_counts[NCHUNKS]; | |
104 | ||
105 | struct list_head zbud_buddied_list; | |
106 | static unsigned long zcache_zbud_buddied_count; | |
107 | ||
108 | /* protects the buddied list and all unbuddied lists */ | |
109 | static DEFINE_SPINLOCK(zbud_budlists_spinlock); | |
110 | ||
111 | static LIST_HEAD(zbpg_unused_list); | |
112 | static unsigned long zcache_zbpg_unused_list_count; | |
113 | ||
114 | /* protects the unused page list */ | |
115 | static DEFINE_SPINLOCK(zbpg_unused_list_spinlock); | |
116 | ||
117 | static atomic_t zcache_zbud_curr_raw_pages; | |
118 | static atomic_t zcache_zbud_curr_zpages; | |
119 | static unsigned long zcache_zbud_curr_zbytes; | |
120 | static unsigned long zcache_zbud_cumul_zpages; | |
121 | static unsigned long zcache_zbud_cumul_zbytes; | |
122 | static unsigned long zcache_compress_poor; | |
123 | ||
124 | /* forward references */ | |
125 | static void *zcache_get_free_page(void); | |
126 | static void zcache_free_page(void *p); | |
127 | ||
128 | /* | |
129 | * zbud helper functions | |
130 | */ | |
131 | ||
132 | static inline unsigned zbud_max_buddy_size(void) | |
133 | { | |
134 | return MAX_CHUNK << CHUNK_SHIFT; | |
135 | } | |
136 | ||
137 | static inline unsigned zbud_size_to_chunks(unsigned size) | |
138 | { | |
139 | BUG_ON(size == 0 || size > zbud_max_buddy_size()); | |
140 | return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; | |
141 | } | |
142 | ||
143 | static inline int zbud_budnum(struct zbud_hdr *zh) | |
144 | { | |
145 | unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1); | |
146 | struct zbud_page *zbpg = NULL; | |
147 | unsigned budnum = -1U; | |
148 | int i; | |
149 | ||
150 | for (i = 0; i < ZBUD_MAX_BUDS; i++) | |
151 | if (offset == offsetof(typeof(*zbpg), buddy[i])) { | |
152 | budnum = i; | |
153 | break; | |
154 | } | |
155 | BUG_ON(budnum == -1U); | |
156 | return budnum; | |
157 | } | |
158 | ||
159 | static char *zbud_data(struct zbud_hdr *zh, unsigned size) | |
160 | { | |
161 | struct zbud_page *zbpg; | |
162 | char *p; | |
163 | unsigned budnum; | |
164 | ||
165 | ASSERT_SENTINEL(zh, ZBH); | |
166 | budnum = zbud_budnum(zh); | |
167 | BUG_ON(size == 0 || size > zbud_max_buddy_size()); | |
168 | zbpg = container_of(zh, struct zbud_page, buddy[budnum]); | |
169 | ASSERT_SPINLOCK(&zbpg->lock); | |
170 | p = (char *)zbpg; | |
171 | if (budnum == 0) | |
172 | p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) & | |
173 | CHUNK_MASK); | |
174 | else if (budnum == 1) | |
175 | p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); | |
176 | return p; | |
177 | } | |
178 | ||
179 | /* | |
180 | * zbud raw page management | |
181 | */ | |
182 | ||
183 | static struct zbud_page *zbud_alloc_raw_page(void) | |
184 | { | |
185 | struct zbud_page *zbpg = NULL; | |
186 | struct zbud_hdr *zh0, *zh1; | |
187 | bool recycled = 0; | |
188 | ||
189 | /* if any pages on the zbpg list, use one */ | |
190 | spin_lock(&zbpg_unused_list_spinlock); | |
191 | if (!list_empty(&zbpg_unused_list)) { | |
192 | zbpg = list_first_entry(&zbpg_unused_list, | |
193 | struct zbud_page, bud_list); | |
194 | list_del_init(&zbpg->bud_list); | |
195 | zcache_zbpg_unused_list_count--; | |
196 | recycled = 1; | |
197 | } | |
198 | spin_unlock(&zbpg_unused_list_spinlock); | |
199 | if (zbpg == NULL) | |
200 | /* none on zbpg list, try to get a kernel page */ | |
201 | zbpg = zcache_get_free_page(); | |
202 | if (likely(zbpg != NULL)) { | |
203 | INIT_LIST_HEAD(&zbpg->bud_list); | |
204 | zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; | |
205 | spin_lock_init(&zbpg->lock); | |
206 | if (recycled) { | |
207 | ASSERT_INVERTED_SENTINEL(zbpg, ZBPG); | |
208 | SET_SENTINEL(zbpg, ZBPG); | |
209 | BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); | |
210 | BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); | |
211 | } else { | |
212 | atomic_inc(&zcache_zbud_curr_raw_pages); | |
213 | INIT_LIST_HEAD(&zbpg->bud_list); | |
214 | SET_SENTINEL(zbpg, ZBPG); | |
215 | zh0->size = 0; zh1->size = 0; | |
216 | tmem_oid_set_invalid(&zh0->oid); | |
217 | tmem_oid_set_invalid(&zh1->oid); | |
218 | } | |
219 | } | |
220 | return zbpg; | |
221 | } | |
222 | ||
223 | static void zbud_free_raw_page(struct zbud_page *zbpg) | |
224 | { | |
225 | struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1]; | |
226 | ||
227 | ASSERT_SENTINEL(zbpg, ZBPG); | |
228 | BUG_ON(!list_empty(&zbpg->bud_list)); | |
229 | ASSERT_SPINLOCK(&zbpg->lock); | |
230 | BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); | |
231 | BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); | |
232 | INVERT_SENTINEL(zbpg, ZBPG); | |
233 | spin_unlock(&zbpg->lock); | |
234 | spin_lock(&zbpg_unused_list_spinlock); | |
235 | list_add(&zbpg->bud_list, &zbpg_unused_list); | |
236 | zcache_zbpg_unused_list_count++; | |
237 | spin_unlock(&zbpg_unused_list_spinlock); | |
238 | } | |
239 | ||
240 | /* | |
241 | * core zbud handling routines | |
242 | */ | |
243 | ||
244 | static unsigned zbud_free(struct zbud_hdr *zh) | |
245 | { | |
246 | unsigned size; | |
247 | ||
248 | ASSERT_SENTINEL(zh, ZBH); | |
249 | BUG_ON(!tmem_oid_valid(&zh->oid)); | |
250 | size = zh->size; | |
251 | BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); | |
252 | zh->size = 0; | |
253 | tmem_oid_set_invalid(&zh->oid); | |
254 | INVERT_SENTINEL(zh, ZBH); | |
255 | zcache_zbud_curr_zbytes -= size; | |
256 | atomic_dec(&zcache_zbud_curr_zpages); | |
257 | return size; | |
258 | } | |
259 | ||
260 | static void zbud_free_and_delist(struct zbud_hdr *zh) | |
261 | { | |
262 | unsigned chunks; | |
263 | struct zbud_hdr *zh_other; | |
264 | unsigned budnum = zbud_budnum(zh), size; | |
265 | struct zbud_page *zbpg = | |
266 | container_of(zh, struct zbud_page, buddy[budnum]); | |
267 | ||
268 | spin_lock(&zbpg->lock); | |
269 | if (list_empty(&zbpg->bud_list)) { | |
270 | /* ignore zombie page... see zbud_evict_pages() */ | |
271 | spin_unlock(&zbpg->lock); | |
272 | return; | |
273 | } | |
274 | size = zbud_free(zh); | |
275 | ASSERT_SPINLOCK(&zbpg->lock); | |
276 | zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0]; | |
277 | if (zh_other->size == 0) { /* was unbuddied: unlist and free */ | |
278 | chunks = zbud_size_to_chunks(size) ; | |
279 | spin_lock(&zbud_budlists_spinlock); | |
280 | BUG_ON(list_empty(&zbud_unbuddied[chunks].list)); | |
281 | list_del_init(&zbpg->bud_list); | |
282 | zbud_unbuddied[chunks].count--; | |
283 | spin_unlock(&zbud_budlists_spinlock); | |
284 | zbud_free_raw_page(zbpg); | |
285 | } else { /* was buddied: move remaining buddy to unbuddied list */ | |
286 | chunks = zbud_size_to_chunks(zh_other->size) ; | |
287 | spin_lock(&zbud_budlists_spinlock); | |
288 | list_del_init(&zbpg->bud_list); | |
289 | zcache_zbud_buddied_count--; | |
290 | list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list); | |
291 | zbud_unbuddied[chunks].count++; | |
292 | spin_unlock(&zbud_budlists_spinlock); | |
293 | spin_unlock(&zbpg->lock); | |
294 | } | |
295 | } | |
296 | ||
297 | static struct zbud_hdr *zbud_create(uint32_t pool_id, struct tmem_oid *oid, | |
298 | uint32_t index, struct page *page, | |
299 | void *cdata, unsigned size) | |
300 | { | |
301 | struct zbud_hdr *zh0, *zh1, *zh = NULL; | |
302 | struct zbud_page *zbpg = NULL, *ztmp; | |
303 | unsigned nchunks; | |
304 | char *to; | |
305 | int i, found_good_buddy = 0; | |
306 | ||
307 | nchunks = zbud_size_to_chunks(size) ; | |
308 | for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { | |
309 | spin_lock(&zbud_budlists_spinlock); | |
310 | if (!list_empty(&zbud_unbuddied[i].list)) { | |
311 | list_for_each_entry_safe(zbpg, ztmp, | |
312 | &zbud_unbuddied[i].list, bud_list) { | |
313 | if (spin_trylock(&zbpg->lock)) { | |
314 | found_good_buddy = i; | |
315 | goto found_unbuddied; | |
316 | } | |
317 | } | |
318 | } | |
319 | spin_unlock(&zbud_budlists_spinlock); | |
320 | } | |
321 | /* didn't find a good buddy, try allocating a new page */ | |
322 | zbpg = zbud_alloc_raw_page(); | |
323 | if (unlikely(zbpg == NULL)) | |
324 | goto out; | |
325 | /* ok, have a page, now compress the data before taking locks */ | |
326 | spin_lock(&zbpg->lock); | |
327 | spin_lock(&zbud_budlists_spinlock); | |
328 | list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list); | |
329 | zbud_unbuddied[nchunks].count++; | |
330 | zh = &zbpg->buddy[0]; | |
331 | goto init_zh; | |
332 | ||
333 | found_unbuddied: | |
334 | ASSERT_SPINLOCK(&zbpg->lock); | |
335 | zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; | |
336 | BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0))); | |
337 | if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */ | |
338 | ASSERT_SENTINEL(zh0, ZBH); | |
339 | zh = zh1; | |
340 | } else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */ | |
341 | ASSERT_SENTINEL(zh1, ZBH); | |
342 | zh = zh0; | |
343 | } else | |
344 | BUG(); | |
345 | list_del_init(&zbpg->bud_list); | |
346 | zbud_unbuddied[found_good_buddy].count--; | |
347 | list_add_tail(&zbpg->bud_list, &zbud_buddied_list); | |
348 | zcache_zbud_buddied_count++; | |
349 | ||
350 | init_zh: | |
351 | SET_SENTINEL(zh, ZBH); | |
352 | zh->size = size; | |
353 | zh->index = index; | |
354 | zh->oid = *oid; | |
355 | zh->pool_id = pool_id; | |
356 | /* can wait to copy the data until the list locks are dropped */ | |
357 | spin_unlock(&zbud_budlists_spinlock); | |
358 | ||
359 | to = zbud_data(zh, size); | |
360 | memcpy(to, cdata, size); | |
361 | spin_unlock(&zbpg->lock); | |
362 | zbud_cumul_chunk_counts[nchunks]++; | |
363 | atomic_inc(&zcache_zbud_curr_zpages); | |
364 | zcache_zbud_cumul_zpages++; | |
365 | zcache_zbud_curr_zbytes += size; | |
366 | zcache_zbud_cumul_zbytes += size; | |
367 | out: | |
368 | return zh; | |
369 | } | |
370 | ||
371 | static int zbud_decompress(struct page *page, struct zbud_hdr *zh) | |
372 | { | |
373 | struct zbud_page *zbpg; | |
374 | unsigned budnum = zbud_budnum(zh); | |
375 | size_t out_len = PAGE_SIZE; | |
376 | char *to_va, *from_va; | |
377 | unsigned size; | |
378 | int ret = 0; | |
379 | ||
380 | zbpg = container_of(zh, struct zbud_page, buddy[budnum]); | |
381 | spin_lock(&zbpg->lock); | |
382 | if (list_empty(&zbpg->bud_list)) { | |
383 | /* ignore zombie page... see zbud_evict_pages() */ | |
384 | ret = -EINVAL; | |
385 | goto out; | |
386 | } | |
387 | ASSERT_SENTINEL(zh, ZBH); | |
388 | BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); | |
389 | to_va = kmap_atomic(page, KM_USER0); | |
390 | size = zh->size; | |
391 | from_va = zbud_data(zh, size); | |
392 | ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len); | |
393 | BUG_ON(ret != LZO_E_OK); | |
394 | BUG_ON(out_len != PAGE_SIZE); | |
395 | kunmap_atomic(to_va, KM_USER0); | |
396 | out: | |
397 | spin_unlock(&zbpg->lock); | |
398 | return ret; | |
399 | } | |
400 | ||
401 | /* | |
402 | * The following routines handle shrinking of ephemeral pages by evicting | |
403 | * pages "least valuable" first. | |
404 | */ | |
405 | ||
406 | static unsigned long zcache_evicted_raw_pages; | |
407 | static unsigned long zcache_evicted_buddied_pages; | |
408 | static unsigned long zcache_evicted_unbuddied_pages; | |
409 | ||
410 | static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid); | |
411 | static void zcache_put_pool(struct tmem_pool *pool); | |
412 | ||
413 | /* | |
414 | * Flush and free all zbuds in a zbpg, then free the pageframe | |
415 | */ | |
416 | static void zbud_evict_zbpg(struct zbud_page *zbpg) | |
417 | { | |
418 | struct zbud_hdr *zh; | |
419 | int i, j; | |
420 | uint32_t pool_id[ZBUD_MAX_BUDS], index[ZBUD_MAX_BUDS]; | |
421 | struct tmem_oid oid[ZBUD_MAX_BUDS]; | |
422 | struct tmem_pool *pool; | |
423 | ||
424 | ASSERT_SPINLOCK(&zbpg->lock); | |
425 | BUG_ON(!list_empty(&zbpg->bud_list)); | |
426 | for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) { | |
427 | zh = &zbpg->buddy[i]; | |
428 | if (zh->size) { | |
429 | pool_id[j] = zh->pool_id; | |
430 | oid[j] = zh->oid; | |
431 | index[j] = zh->index; | |
432 | j++; | |
433 | zbud_free(zh); | |
434 | } | |
435 | } | |
436 | spin_unlock(&zbpg->lock); | |
437 | for (i = 0; i < j; i++) { | |
438 | pool = zcache_get_pool_by_id(pool_id[i]); | |
439 | if (pool != NULL) { | |
440 | tmem_flush_page(pool, &oid[i], index[i]); | |
441 | zcache_put_pool(pool); | |
442 | } | |
443 | } | |
444 | ASSERT_SENTINEL(zbpg, ZBPG); | |
445 | spin_lock(&zbpg->lock); | |
446 | zbud_free_raw_page(zbpg); | |
447 | } | |
448 | ||
449 | /* | |
450 | * Free nr pages. This code is funky because we want to hold the locks | |
451 | * protecting various lists for as short a time as possible, and in some | |
452 | * circumstances the list may change asynchronously when the list lock is | |
453 | * not held. In some cases we also trylock not only to avoid waiting on a | |
454 | * page in use by another cpu, but also to avoid potential deadlock due to | |
455 | * lock inversion. | |
456 | */ | |
457 | static void zbud_evict_pages(int nr) | |
458 | { | |
459 | struct zbud_page *zbpg; | |
460 | int i; | |
461 | ||
462 | /* first try freeing any pages on unused list */ | |
463 | retry_unused_list: | |
464 | spin_lock_bh(&zbpg_unused_list_spinlock); | |
465 | if (!list_empty(&zbpg_unused_list)) { | |
466 | /* can't walk list here, since it may change when unlocked */ | |
467 | zbpg = list_first_entry(&zbpg_unused_list, | |
468 | struct zbud_page, bud_list); | |
469 | list_del_init(&zbpg->bud_list); | |
470 | zcache_zbpg_unused_list_count--; | |
471 | atomic_dec(&zcache_zbud_curr_raw_pages); | |
472 | spin_unlock_bh(&zbpg_unused_list_spinlock); | |
473 | zcache_free_page(zbpg); | |
474 | zcache_evicted_raw_pages++; | |
475 | if (--nr <= 0) | |
476 | goto out; | |
477 | goto retry_unused_list; | |
478 | } | |
479 | spin_unlock_bh(&zbpg_unused_list_spinlock); | |
480 | ||
481 | /* now try freeing unbuddied pages, starting with least space avail */ | |
482 | for (i = 0; i < MAX_CHUNK; i++) { | |
483 | retry_unbud_list_i: | |
484 | spin_lock_bh(&zbud_budlists_spinlock); | |
485 | if (list_empty(&zbud_unbuddied[i].list)) { | |
486 | spin_unlock_bh(&zbud_budlists_spinlock); | |
487 | continue; | |
488 | } | |
489 | list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) { | |
490 | if (unlikely(!spin_trylock(&zbpg->lock))) | |
491 | continue; | |
492 | list_del_init(&zbpg->bud_list); | |
493 | zbud_unbuddied[i].count--; | |
494 | spin_unlock(&zbud_budlists_spinlock); | |
495 | zcache_evicted_unbuddied_pages++; | |
496 | /* want budlists unlocked when doing zbpg eviction */ | |
497 | zbud_evict_zbpg(zbpg); | |
498 | local_bh_enable(); | |
499 | if (--nr <= 0) | |
500 | goto out; | |
501 | goto retry_unbud_list_i; | |
502 | } | |
503 | spin_unlock_bh(&zbud_budlists_spinlock); | |
504 | } | |
505 | ||
506 | /* as a last resort, free buddied pages */ | |
507 | retry_bud_list: | |
508 | spin_lock_bh(&zbud_budlists_spinlock); | |
509 | if (list_empty(&zbud_buddied_list)) { | |
510 | spin_unlock_bh(&zbud_budlists_spinlock); | |
511 | goto out; | |
512 | } | |
513 | list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) { | |
514 | if (unlikely(!spin_trylock(&zbpg->lock))) | |
515 | continue; | |
516 | list_del_init(&zbpg->bud_list); | |
517 | zcache_zbud_buddied_count--; | |
518 | spin_unlock(&zbud_budlists_spinlock); | |
519 | zcache_evicted_buddied_pages++; | |
520 | /* want budlists unlocked when doing zbpg eviction */ | |
521 | zbud_evict_zbpg(zbpg); | |
522 | local_bh_enable(); | |
523 | if (--nr <= 0) | |
524 | goto out; | |
525 | goto retry_bud_list; | |
526 | } | |
527 | spin_unlock_bh(&zbud_budlists_spinlock); | |
528 | out: | |
529 | return; | |
530 | } | |
531 | ||
532 | static void zbud_init(void) | |
533 | { | |
534 | int i; | |
535 | ||
536 | INIT_LIST_HEAD(&zbud_buddied_list); | |
537 | zcache_zbud_buddied_count = 0; | |
538 | for (i = 0; i < NCHUNKS; i++) { | |
539 | INIT_LIST_HEAD(&zbud_unbuddied[i].list); | |
540 | zbud_unbuddied[i].count = 0; | |
541 | } | |
542 | } | |
543 | ||
544 | #ifdef CONFIG_SYSFS | |
545 | /* | |
546 | * These sysfs routines show a nice distribution of how many zbpg's are | |
547 | * currently (and have ever been placed) in each unbuddied list. It's fun | |
548 | * to watch but can probably go away before final merge. | |
549 | */ | |
550 | static int zbud_show_unbuddied_list_counts(char *buf) | |
551 | { | |
552 | int i; | |
553 | char *p = buf; | |
554 | ||
555 | for (i = 0; i < NCHUNKS - 1; i++) | |
556 | p += sprintf(p, "%u ", zbud_unbuddied[i].count); | |
557 | p += sprintf(p, "%d\n", zbud_unbuddied[i].count); | |
558 | return p - buf; | |
559 | } | |
560 | ||
561 | static int zbud_show_cumul_chunk_counts(char *buf) | |
562 | { | |
563 | unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0; | |
564 | unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0; | |
565 | unsigned long total_chunks_lte_42 = 0; | |
566 | char *p = buf; | |
567 | ||
568 | for (i = 0; i < NCHUNKS; i++) { | |
569 | p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]); | |
570 | chunks += zbud_cumul_chunk_counts[i]; | |
571 | total_chunks += zbud_cumul_chunk_counts[i]; | |
572 | sum_total_chunks += i * zbud_cumul_chunk_counts[i]; | |
573 | if (i == 21) | |
574 | total_chunks_lte_21 = total_chunks; | |
575 | if (i == 32) | |
576 | total_chunks_lte_32 = total_chunks; | |
577 | if (i == 42) | |
578 | total_chunks_lte_42 = total_chunks; | |
579 | } | |
580 | p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n", | |
581 | total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42, | |
582 | chunks == 0 ? 0 : sum_total_chunks / chunks); | |
583 | return p - buf; | |
584 | } | |
585 | #endif | |
586 | ||
587 | /********** | |
588 | * This "zv" PAM implementation combines the TLSF-based xvMalloc | |
589 | * with lzo1x compression to maximize the amount of data that can | |
590 | * be packed into a physical page. | |
591 | * | |
592 | * Zv represents a PAM page with the index and object (plus a "size" value | |
593 | * necessary for decompression) immediately preceding the compressed data. | |
594 | */ | |
595 | ||
596 | #define ZVH_SENTINEL 0x43214321 | |
597 | ||
598 | struct zv_hdr { | |
599 | uint32_t pool_id; | |
600 | struct tmem_oid oid; | |
601 | uint32_t index; | |
602 | DECL_SENTINEL | |
603 | }; | |
604 | ||
605 | static const int zv_max_page_size = (PAGE_SIZE / 8) * 7; | |
606 | ||
607 | static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id, | |
608 | struct tmem_oid *oid, uint32_t index, | |
609 | void *cdata, unsigned clen) | |
610 | { | |
611 | struct page *page; | |
612 | struct zv_hdr *zv = NULL; | |
613 | uint32_t offset; | |
614 | int ret; | |
615 | ||
616 | BUG_ON(!irqs_disabled()); | |
617 | ret = xv_malloc(xvpool, clen + sizeof(struct zv_hdr), | |
618 | &page, &offset, ZCACHE_GFP_MASK); | |
619 | if (unlikely(ret)) | |
620 | goto out; | |
621 | zv = kmap_atomic(page, KM_USER0) + offset; | |
622 | zv->index = index; | |
623 | zv->oid = *oid; | |
624 | zv->pool_id = pool_id; | |
625 | SET_SENTINEL(zv, ZVH); | |
626 | memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen); | |
627 | kunmap_atomic(zv, KM_USER0); | |
628 | out: | |
629 | return zv; | |
630 | } | |
631 | ||
632 | static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv) | |
633 | { | |
634 | unsigned long flags; | |
635 | struct page *page; | |
636 | uint32_t offset; | |
637 | uint16_t size; | |
638 | ||
639 | ASSERT_SENTINEL(zv, ZVH); | |
640 | size = xv_get_object_size(zv) - sizeof(*zv); | |
641 | BUG_ON(size == 0 || size > zv_max_page_size); | |
642 | INVERT_SENTINEL(zv, ZVH); | |
643 | page = virt_to_page(zv); | |
644 | offset = (unsigned long)zv & ~PAGE_MASK; | |
645 | local_irq_save(flags); | |
646 | xv_free(xvpool, page, offset); | |
647 | local_irq_restore(flags); | |
648 | } | |
649 | ||
650 | static void zv_decompress(struct page *page, struct zv_hdr *zv) | |
651 | { | |
652 | size_t clen = PAGE_SIZE; | |
653 | char *to_va; | |
654 | unsigned size; | |
655 | int ret; | |
656 | ||
657 | ASSERT_SENTINEL(zv, ZVH); | |
658 | size = xv_get_object_size(zv) - sizeof(*zv); | |
659 | BUG_ON(size == 0 || size > zv_max_page_size); | |
660 | to_va = kmap_atomic(page, KM_USER0); | |
661 | ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv), | |
662 | size, to_va, &clen); | |
663 | kunmap_atomic(to_va, KM_USER0); | |
664 | BUG_ON(ret != LZO_E_OK); | |
665 | BUG_ON(clen != PAGE_SIZE); | |
666 | } | |
667 | ||
668 | /* | |
669 | * zcache core code starts here | |
670 | */ | |
671 | ||
672 | /* useful stats not collected by cleancache or frontswap */ | |
673 | static unsigned long zcache_flush_total; | |
674 | static unsigned long zcache_flush_found; | |
675 | static unsigned long zcache_flobj_total; | |
676 | static unsigned long zcache_flobj_found; | |
677 | static unsigned long zcache_failed_eph_puts; | |
678 | static unsigned long zcache_failed_pers_puts; | |
679 | ||
680 | #define MAX_POOLS_PER_CLIENT 16 | |
681 | ||
682 | static struct { | |
683 | struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT]; | |
684 | struct xv_pool *xvpool; | |
685 | } zcache_client; | |
686 | ||
687 | /* | |
688 | * Tmem operations assume the poolid implies the invoking client. | |
689 | * Zcache only has one client (the kernel itself), so translate | |
690 | * the poolid into the tmem_pool allocated for it. A KVM version | |
691 | * of zcache would have one client per guest and each client might | |
692 | * have a poolid==N. | |
693 | */ | |
694 | static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid) | |
695 | { | |
696 | struct tmem_pool *pool = NULL; | |
697 | ||
698 | if (poolid >= 0) { | |
699 | pool = zcache_client.tmem_pools[poolid]; | |
700 | if (pool != NULL) | |
701 | atomic_inc(&pool->refcount); | |
702 | } | |
703 | return pool; | |
704 | } | |
705 | ||
706 | static void zcache_put_pool(struct tmem_pool *pool) | |
707 | { | |
708 | if (pool != NULL) | |
709 | atomic_dec(&pool->refcount); | |
710 | } | |
711 | ||
712 | /* counters for debugging */ | |
713 | static unsigned long zcache_failed_get_free_pages; | |
714 | static unsigned long zcache_failed_alloc; | |
715 | static unsigned long zcache_put_to_flush; | |
716 | static unsigned long zcache_aborted_preload; | |
717 | static unsigned long zcache_aborted_shrink; | |
718 | ||
719 | /* | |
720 | * Ensure that memory allocation requests in zcache don't result | |
721 | * in direct reclaim requests via the shrinker, which would cause | |
722 | * an infinite loop. Maybe a GFP flag would be better? | |
723 | */ | |
724 | static DEFINE_SPINLOCK(zcache_direct_reclaim_lock); | |
725 | ||
726 | /* | |
727 | * for now, used named slabs so can easily track usage; later can | |
728 | * either just use kmalloc, or perhaps add a slab-like allocator | |
729 | * to more carefully manage total memory utilization | |
730 | */ | |
731 | static struct kmem_cache *zcache_objnode_cache; | |
732 | static struct kmem_cache *zcache_obj_cache; | |
733 | static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0); | |
734 | static unsigned long zcache_curr_obj_count_max; | |
735 | static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0); | |
736 | static unsigned long zcache_curr_objnode_count_max; | |
737 | ||
738 | /* | |
739 | * to avoid memory allocation recursion (e.g. due to direct reclaim), we | |
740 | * preload all necessary data structures so the hostops callbacks never | |
741 | * actually do a malloc | |
742 | */ | |
743 | struct zcache_preload { | |
744 | void *page; | |
745 | struct tmem_obj *obj; | |
746 | int nr; | |
747 | struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH]; | |
748 | }; | |
749 | static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; | |
750 | ||
751 | static int zcache_do_preload(struct tmem_pool *pool) | |
752 | { | |
753 | struct zcache_preload *kp; | |
754 | struct tmem_objnode *objnode; | |
755 | struct tmem_obj *obj; | |
756 | void *page; | |
757 | int ret = -ENOMEM; | |
758 | ||
759 | if (unlikely(zcache_objnode_cache == NULL)) | |
760 | goto out; | |
761 | if (unlikely(zcache_obj_cache == NULL)) | |
762 | goto out; | |
763 | if (!spin_trylock(&zcache_direct_reclaim_lock)) { | |
764 | zcache_aborted_preload++; | |
765 | goto out; | |
766 | } | |
767 | preempt_disable(); | |
768 | kp = &__get_cpu_var(zcache_preloads); | |
769 | while (kp->nr < ARRAY_SIZE(kp->objnodes)) { | |
770 | preempt_enable_no_resched(); | |
771 | objnode = kmem_cache_alloc(zcache_objnode_cache, | |
772 | ZCACHE_GFP_MASK); | |
773 | if (unlikely(objnode == NULL)) { | |
774 | zcache_failed_alloc++; | |
775 | goto unlock_out; | |
776 | } | |
777 | preempt_disable(); | |
778 | kp = &__get_cpu_var(zcache_preloads); | |
779 | if (kp->nr < ARRAY_SIZE(kp->objnodes)) | |
780 | kp->objnodes[kp->nr++] = objnode; | |
781 | else | |
782 | kmem_cache_free(zcache_objnode_cache, objnode); | |
783 | } | |
784 | preempt_enable_no_resched(); | |
785 | obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); | |
786 | if (unlikely(obj == NULL)) { | |
787 | zcache_failed_alloc++; | |
788 | goto unlock_out; | |
789 | } | |
790 | page = (void *)__get_free_page(ZCACHE_GFP_MASK); | |
791 | if (unlikely(page == NULL)) { | |
792 | zcache_failed_get_free_pages++; | |
69648bed | 793 | kmem_cache_free(zcache_obj_cache, obj); |
9cc06bf8 DM |
794 | goto unlock_out; |
795 | } | |
796 | preempt_disable(); | |
797 | kp = &__get_cpu_var(zcache_preloads); | |
798 | if (kp->obj == NULL) | |
799 | kp->obj = obj; | |
800 | else | |
801 | kmem_cache_free(zcache_obj_cache, obj); | |
802 | if (kp->page == NULL) | |
803 | kp->page = page; | |
804 | else | |
805 | free_page((unsigned long)page); | |
806 | ret = 0; | |
807 | unlock_out: | |
808 | spin_unlock(&zcache_direct_reclaim_lock); | |
809 | out: | |
810 | return ret; | |
811 | } | |
812 | ||
813 | static void *zcache_get_free_page(void) | |
814 | { | |
815 | struct zcache_preload *kp; | |
816 | void *page; | |
817 | ||
818 | kp = &__get_cpu_var(zcache_preloads); | |
819 | page = kp->page; | |
820 | BUG_ON(page == NULL); | |
821 | kp->page = NULL; | |
822 | return page; | |
823 | } | |
824 | ||
825 | static void zcache_free_page(void *p) | |
826 | { | |
827 | free_page((unsigned long)p); | |
828 | } | |
829 | ||
830 | /* | |
831 | * zcache implementation for tmem host ops | |
832 | */ | |
833 | ||
834 | static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) | |
835 | { | |
836 | struct tmem_objnode *objnode = NULL; | |
837 | unsigned long count; | |
838 | struct zcache_preload *kp; | |
839 | ||
840 | kp = &__get_cpu_var(zcache_preloads); | |
841 | if (kp->nr <= 0) | |
842 | goto out; | |
843 | objnode = kp->objnodes[kp->nr - 1]; | |
844 | BUG_ON(objnode == NULL); | |
845 | kp->objnodes[kp->nr - 1] = NULL; | |
846 | kp->nr--; | |
847 | count = atomic_inc_return(&zcache_curr_objnode_count); | |
848 | if (count > zcache_curr_objnode_count_max) | |
849 | zcache_curr_objnode_count_max = count; | |
850 | out: | |
851 | return objnode; | |
852 | } | |
853 | ||
854 | static void zcache_objnode_free(struct tmem_objnode *objnode, | |
855 | struct tmem_pool *pool) | |
856 | { | |
857 | atomic_dec(&zcache_curr_objnode_count); | |
858 | BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0); | |
859 | kmem_cache_free(zcache_objnode_cache, objnode); | |
860 | } | |
861 | ||
862 | static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) | |
863 | { | |
864 | struct tmem_obj *obj = NULL; | |
865 | unsigned long count; | |
866 | struct zcache_preload *kp; | |
867 | ||
868 | kp = &__get_cpu_var(zcache_preloads); | |
869 | obj = kp->obj; | |
870 | BUG_ON(obj == NULL); | |
871 | kp->obj = NULL; | |
872 | count = atomic_inc_return(&zcache_curr_obj_count); | |
873 | if (count > zcache_curr_obj_count_max) | |
874 | zcache_curr_obj_count_max = count; | |
875 | return obj; | |
876 | } | |
877 | ||
878 | static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) | |
879 | { | |
880 | atomic_dec(&zcache_curr_obj_count); | |
881 | BUG_ON(atomic_read(&zcache_curr_obj_count) < 0); | |
882 | kmem_cache_free(zcache_obj_cache, obj); | |
883 | } | |
884 | ||
885 | static struct tmem_hostops zcache_hostops = { | |
886 | .obj_alloc = zcache_obj_alloc, | |
887 | .obj_free = zcache_obj_free, | |
888 | .objnode_alloc = zcache_objnode_alloc, | |
889 | .objnode_free = zcache_objnode_free, | |
890 | }; | |
891 | ||
892 | /* | |
893 | * zcache implementations for PAM page descriptor ops | |
894 | */ | |
895 | ||
896 | static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0); | |
897 | static unsigned long zcache_curr_eph_pampd_count_max; | |
898 | static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0); | |
899 | static unsigned long zcache_curr_pers_pampd_count_max; | |
900 | ||
901 | /* forward reference */ | |
902 | static int zcache_compress(struct page *from, void **out_va, size_t *out_len); | |
903 | ||
904 | static void *zcache_pampd_create(struct tmem_pool *pool, struct tmem_oid *oid, | |
905 | uint32_t index, struct page *page) | |
906 | { | |
907 | void *pampd = NULL, *cdata; | |
908 | size_t clen; | |
909 | int ret; | |
910 | bool ephemeral = is_ephemeral(pool); | |
911 | unsigned long count; | |
912 | ||
913 | if (ephemeral) { | |
914 | ret = zcache_compress(page, &cdata, &clen); | |
915 | if (ret == 0) | |
916 | ||
917 | goto out; | |
918 | if (clen == 0 || clen > zbud_max_buddy_size()) { | |
919 | zcache_compress_poor++; | |
920 | goto out; | |
921 | } | |
922 | pampd = (void *)zbud_create(pool->pool_id, oid, index, | |
923 | page, cdata, clen); | |
924 | if (pampd != NULL) { | |
925 | count = atomic_inc_return(&zcache_curr_eph_pampd_count); | |
926 | if (count > zcache_curr_eph_pampd_count_max) | |
927 | zcache_curr_eph_pampd_count_max = count; | |
928 | } | |
929 | } else { | |
930 | /* | |
931 | * FIXME: This is all the "policy" there is for now. | |
932 | * 3/4 totpages should allow ~37% of RAM to be filled with | |
933 | * compressed frontswap pages | |
934 | */ | |
935 | if (atomic_read(&zcache_curr_pers_pampd_count) > | |
936 | 3 * totalram_pages / 4) | |
937 | goto out; | |
938 | ret = zcache_compress(page, &cdata, &clen); | |
939 | if (ret == 0) | |
940 | goto out; | |
941 | if (clen > zv_max_page_size) { | |
942 | zcache_compress_poor++; | |
943 | goto out; | |
944 | } | |
945 | pampd = (void *)zv_create(zcache_client.xvpool, pool->pool_id, | |
946 | oid, index, cdata, clen); | |
947 | if (pampd == NULL) | |
948 | goto out; | |
949 | count = atomic_inc_return(&zcache_curr_pers_pampd_count); | |
950 | if (count > zcache_curr_pers_pampd_count_max) | |
951 | zcache_curr_pers_pampd_count_max = count; | |
952 | } | |
953 | out: | |
954 | return pampd; | |
955 | } | |
956 | ||
957 | /* | |
958 | * fill the pageframe corresponding to the struct page with the data | |
959 | * from the passed pampd | |
960 | */ | |
961 | static int zcache_pampd_get_data(struct page *page, void *pampd, | |
962 | struct tmem_pool *pool) | |
963 | { | |
964 | int ret = 0; | |
965 | ||
966 | if (is_ephemeral(pool)) | |
967 | ret = zbud_decompress(page, pampd); | |
968 | else | |
969 | zv_decompress(page, pampd); | |
970 | return ret; | |
971 | } | |
972 | ||
973 | /* | |
974 | * free the pampd and remove it from any zcache lists | |
975 | * pampd must no longer be pointed to from any tmem data structures! | |
976 | */ | |
977 | static void zcache_pampd_free(void *pampd, struct tmem_pool *pool) | |
978 | { | |
979 | if (is_ephemeral(pool)) { | |
980 | zbud_free_and_delist((struct zbud_hdr *)pampd); | |
981 | atomic_dec(&zcache_curr_eph_pampd_count); | |
982 | BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0); | |
983 | } else { | |
984 | zv_free(zcache_client.xvpool, (struct zv_hdr *)pampd); | |
985 | atomic_dec(&zcache_curr_pers_pampd_count); | |
986 | BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0); | |
987 | } | |
988 | } | |
989 | ||
990 | static struct tmem_pamops zcache_pamops = { | |
991 | .create = zcache_pampd_create, | |
992 | .get_data = zcache_pampd_get_data, | |
993 | .free = zcache_pampd_free, | |
994 | }; | |
995 | ||
996 | /* | |
997 | * zcache compression/decompression and related per-cpu stuff | |
998 | */ | |
999 | ||
1000 | #define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS | |
1001 | #define LZO_DSTMEM_PAGE_ORDER 1 | |
1002 | static DEFINE_PER_CPU(unsigned char *, zcache_workmem); | |
1003 | static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); | |
1004 | ||
1005 | static int zcache_compress(struct page *from, void **out_va, size_t *out_len) | |
1006 | { | |
1007 | int ret = 0; | |
1008 | unsigned char *dmem = __get_cpu_var(zcache_dstmem); | |
1009 | unsigned char *wmem = __get_cpu_var(zcache_workmem); | |
1010 | char *from_va; | |
1011 | ||
1012 | BUG_ON(!irqs_disabled()); | |
1013 | if (unlikely(dmem == NULL || wmem == NULL)) | |
1014 | goto out; /* no buffer, so can't compress */ | |
1015 | from_va = kmap_atomic(from, KM_USER0); | |
1016 | mb(); | |
1017 | ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem); | |
1018 | BUG_ON(ret != LZO_E_OK); | |
1019 | *out_va = dmem; | |
1020 | kunmap_atomic(from_va, KM_USER0); | |
1021 | ret = 1; | |
1022 | out: | |
1023 | return ret; | |
1024 | } | |
1025 | ||
1026 | ||
1027 | static int zcache_cpu_notifier(struct notifier_block *nb, | |
1028 | unsigned long action, void *pcpu) | |
1029 | { | |
1030 | int cpu = (long)pcpu; | |
1031 | struct zcache_preload *kp; | |
1032 | ||
1033 | switch (action) { | |
1034 | case CPU_UP_PREPARE: | |
1035 | per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( | |
1036 | GFP_KERNEL | __GFP_REPEAT, | |
1037 | LZO_DSTMEM_PAGE_ORDER), | |
1038 | per_cpu(zcache_workmem, cpu) = | |
1039 | kzalloc(LZO1X_MEM_COMPRESS, | |
1040 | GFP_KERNEL | __GFP_REPEAT); | |
1041 | break; | |
1042 | case CPU_DEAD: | |
1043 | case CPU_UP_CANCELED: | |
1044 | free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), | |
1045 | LZO_DSTMEM_PAGE_ORDER); | |
1046 | per_cpu(zcache_dstmem, cpu) = NULL; | |
1047 | kfree(per_cpu(zcache_workmem, cpu)); | |
1048 | per_cpu(zcache_workmem, cpu) = NULL; | |
1049 | kp = &per_cpu(zcache_preloads, cpu); | |
1050 | while (kp->nr) { | |
1051 | kmem_cache_free(zcache_objnode_cache, | |
1052 | kp->objnodes[kp->nr - 1]); | |
1053 | kp->objnodes[kp->nr - 1] = NULL; | |
1054 | kp->nr--; | |
1055 | } | |
1056 | kmem_cache_free(zcache_obj_cache, kp->obj); | |
1057 | free_page((unsigned long)kp->page); | |
1058 | break; | |
1059 | default: | |
1060 | break; | |
1061 | } | |
1062 | return NOTIFY_OK; | |
1063 | } | |
1064 | ||
1065 | static struct notifier_block zcache_cpu_notifier_block = { | |
1066 | .notifier_call = zcache_cpu_notifier | |
1067 | }; | |
1068 | ||
1069 | #ifdef CONFIG_SYSFS | |
1070 | #define ZCACHE_SYSFS_RO(_name) \ | |
1071 | static ssize_t zcache_##_name##_show(struct kobject *kobj, \ | |
1072 | struct kobj_attribute *attr, char *buf) \ | |
1073 | { \ | |
1074 | return sprintf(buf, "%lu\n", zcache_##_name); \ | |
1075 | } \ | |
1076 | static struct kobj_attribute zcache_##_name##_attr = { \ | |
1077 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ | |
1078 | .show = zcache_##_name##_show, \ | |
1079 | } | |
1080 | ||
1081 | #define ZCACHE_SYSFS_RO_ATOMIC(_name) \ | |
1082 | static ssize_t zcache_##_name##_show(struct kobject *kobj, \ | |
1083 | struct kobj_attribute *attr, char *buf) \ | |
1084 | { \ | |
1085 | return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \ | |
1086 | } \ | |
1087 | static struct kobj_attribute zcache_##_name##_attr = { \ | |
1088 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ | |
1089 | .show = zcache_##_name##_show, \ | |
1090 | } | |
1091 | ||
1092 | #define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \ | |
1093 | static ssize_t zcache_##_name##_show(struct kobject *kobj, \ | |
1094 | struct kobj_attribute *attr, char *buf) \ | |
1095 | { \ | |
1096 | return _func(buf); \ | |
1097 | } \ | |
1098 | static struct kobj_attribute zcache_##_name##_attr = { \ | |
1099 | .attr = { .name = __stringify(_name), .mode = 0444 }, \ | |
1100 | .show = zcache_##_name##_show, \ | |
1101 | } | |
1102 | ||
1103 | ZCACHE_SYSFS_RO(curr_obj_count_max); | |
1104 | ZCACHE_SYSFS_RO(curr_objnode_count_max); | |
1105 | ZCACHE_SYSFS_RO(flush_total); | |
1106 | ZCACHE_SYSFS_RO(flush_found); | |
1107 | ZCACHE_SYSFS_RO(flobj_total); | |
1108 | ZCACHE_SYSFS_RO(flobj_found); | |
1109 | ZCACHE_SYSFS_RO(failed_eph_puts); | |
1110 | ZCACHE_SYSFS_RO(failed_pers_puts); | |
1111 | ZCACHE_SYSFS_RO(zbud_curr_zbytes); | |
1112 | ZCACHE_SYSFS_RO(zbud_cumul_zpages); | |
1113 | ZCACHE_SYSFS_RO(zbud_cumul_zbytes); | |
1114 | ZCACHE_SYSFS_RO(zbud_buddied_count); | |
1115 | ZCACHE_SYSFS_RO(zbpg_unused_list_count); | |
1116 | ZCACHE_SYSFS_RO(evicted_raw_pages); | |
1117 | ZCACHE_SYSFS_RO(evicted_unbuddied_pages); | |
1118 | ZCACHE_SYSFS_RO(evicted_buddied_pages); | |
1119 | ZCACHE_SYSFS_RO(failed_get_free_pages); | |
1120 | ZCACHE_SYSFS_RO(failed_alloc); | |
1121 | ZCACHE_SYSFS_RO(put_to_flush); | |
1122 | ZCACHE_SYSFS_RO(aborted_preload); | |
1123 | ZCACHE_SYSFS_RO(aborted_shrink); | |
1124 | ZCACHE_SYSFS_RO(compress_poor); | |
1125 | ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages); | |
1126 | ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages); | |
1127 | ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count); | |
1128 | ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count); | |
1129 | ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts, | |
1130 | zbud_show_unbuddied_list_counts); | |
1131 | ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts, | |
1132 | zbud_show_cumul_chunk_counts); | |
1133 | ||
1134 | static struct attribute *zcache_attrs[] = { | |
1135 | &zcache_curr_obj_count_attr.attr, | |
1136 | &zcache_curr_obj_count_max_attr.attr, | |
1137 | &zcache_curr_objnode_count_attr.attr, | |
1138 | &zcache_curr_objnode_count_max_attr.attr, | |
1139 | &zcache_flush_total_attr.attr, | |
1140 | &zcache_flobj_total_attr.attr, | |
1141 | &zcache_flush_found_attr.attr, | |
1142 | &zcache_flobj_found_attr.attr, | |
1143 | &zcache_failed_eph_puts_attr.attr, | |
1144 | &zcache_failed_pers_puts_attr.attr, | |
1145 | &zcache_compress_poor_attr.attr, | |
1146 | &zcache_zbud_curr_raw_pages_attr.attr, | |
1147 | &zcache_zbud_curr_zpages_attr.attr, | |
1148 | &zcache_zbud_curr_zbytes_attr.attr, | |
1149 | &zcache_zbud_cumul_zpages_attr.attr, | |
1150 | &zcache_zbud_cumul_zbytes_attr.attr, | |
1151 | &zcache_zbud_buddied_count_attr.attr, | |
1152 | &zcache_zbpg_unused_list_count_attr.attr, | |
1153 | &zcache_evicted_raw_pages_attr.attr, | |
1154 | &zcache_evicted_unbuddied_pages_attr.attr, | |
1155 | &zcache_evicted_buddied_pages_attr.attr, | |
1156 | &zcache_failed_get_free_pages_attr.attr, | |
1157 | &zcache_failed_alloc_attr.attr, | |
1158 | &zcache_put_to_flush_attr.attr, | |
1159 | &zcache_aborted_preload_attr.attr, | |
1160 | &zcache_aborted_shrink_attr.attr, | |
1161 | &zcache_zbud_unbuddied_list_counts_attr.attr, | |
1162 | &zcache_zbud_cumul_chunk_counts_attr.attr, | |
1163 | NULL, | |
1164 | }; | |
1165 | ||
1166 | static struct attribute_group zcache_attr_group = { | |
1167 | .attrs = zcache_attrs, | |
1168 | .name = "zcache", | |
1169 | }; | |
1170 | ||
1171 | #endif /* CONFIG_SYSFS */ | |
1172 | /* | |
1173 | * When zcache is disabled ("frozen"), pools can be created and destroyed, | |
1174 | * but all puts (and thus all other operations that require memory allocation) | |
1175 | * must fail. If zcache is unfrozen, accepts puts, then frozen again, | |
1176 | * data consistency requires all puts while frozen to be converted into | |
1177 | * flushes. | |
1178 | */ | |
1179 | static bool zcache_freeze; | |
1180 | ||
1181 | /* | |
1182 | * zcache shrinker interface (only useful for ephemeral pages, so zbud only) | |
1183 | */ | |
1495f230 YH |
1184 | static int shrink_zcache_memory(struct shrinker *shrink, |
1185 | struct shrink_control *sc) | |
9cc06bf8 DM |
1186 | { |
1187 | int ret = -1; | |
1495f230 YH |
1188 | int nr = sc->nr_to_scan; |
1189 | gfp_t gfp_mask = sc->gfp_mask; | |
9cc06bf8 DM |
1190 | |
1191 | if (nr >= 0) { | |
1192 | if (!(gfp_mask & __GFP_FS)) | |
1193 | /* does this case really need to be skipped? */ | |
1194 | goto out; | |
1195 | if (spin_trylock(&zcache_direct_reclaim_lock)) { | |
1196 | zbud_evict_pages(nr); | |
1197 | spin_unlock(&zcache_direct_reclaim_lock); | |
1198 | } else | |
1199 | zcache_aborted_shrink++; | |
1200 | } | |
1201 | ret = (int)atomic_read(&zcache_zbud_curr_raw_pages); | |
1202 | out: | |
1203 | return ret; | |
1204 | } | |
1205 | ||
1206 | static struct shrinker zcache_shrinker = { | |
1207 | .shrink = shrink_zcache_memory, | |
1208 | .seeks = DEFAULT_SEEKS, | |
1209 | }; | |
1210 | ||
1211 | /* | |
1212 | * zcache shims between cleancache/frontswap ops and tmem | |
1213 | */ | |
1214 | ||
1215 | static int zcache_put_page(int pool_id, struct tmem_oid *oidp, | |
1216 | uint32_t index, struct page *page) | |
1217 | { | |
1218 | struct tmem_pool *pool; | |
1219 | int ret = -1; | |
1220 | ||
1221 | BUG_ON(!irqs_disabled()); | |
1222 | pool = zcache_get_pool_by_id(pool_id); | |
1223 | if (unlikely(pool == NULL)) | |
1224 | goto out; | |
1225 | if (!zcache_freeze && zcache_do_preload(pool) == 0) { | |
1226 | /* preload does preempt_disable on success */ | |
1227 | ret = tmem_put(pool, oidp, index, page); | |
1228 | if (ret < 0) { | |
1229 | if (is_ephemeral(pool)) | |
1230 | zcache_failed_eph_puts++; | |
1231 | else | |
1232 | zcache_failed_pers_puts++; | |
1233 | } | |
1234 | zcache_put_pool(pool); | |
1235 | preempt_enable_no_resched(); | |
1236 | } else { | |
1237 | zcache_put_to_flush++; | |
1238 | if (atomic_read(&pool->obj_count) > 0) | |
1239 | /* the put fails whether the flush succeeds or not */ | |
1240 | (void)tmem_flush_page(pool, oidp, index); | |
1241 | zcache_put_pool(pool); | |
1242 | } | |
1243 | out: | |
1244 | return ret; | |
1245 | } | |
1246 | ||
1247 | static int zcache_get_page(int pool_id, struct tmem_oid *oidp, | |
1248 | uint32_t index, struct page *page) | |
1249 | { | |
1250 | struct tmem_pool *pool; | |
1251 | int ret = -1; | |
1252 | unsigned long flags; | |
1253 | ||
1254 | local_irq_save(flags); | |
1255 | pool = zcache_get_pool_by_id(pool_id); | |
1256 | if (likely(pool != NULL)) { | |
1257 | if (atomic_read(&pool->obj_count) > 0) | |
1258 | ret = tmem_get(pool, oidp, index, page); | |
1259 | zcache_put_pool(pool); | |
1260 | } | |
1261 | local_irq_restore(flags); | |
1262 | return ret; | |
1263 | } | |
1264 | ||
1265 | static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) | |
1266 | { | |
1267 | struct tmem_pool *pool; | |
1268 | int ret = -1; | |
1269 | unsigned long flags; | |
1270 | ||
1271 | local_irq_save(flags); | |
1272 | zcache_flush_total++; | |
1273 | pool = zcache_get_pool_by_id(pool_id); | |
1274 | if (likely(pool != NULL)) { | |
1275 | if (atomic_read(&pool->obj_count) > 0) | |
1276 | ret = tmem_flush_page(pool, oidp, index); | |
1277 | zcache_put_pool(pool); | |
1278 | } | |
1279 | if (ret >= 0) | |
1280 | zcache_flush_found++; | |
1281 | local_irq_restore(flags); | |
1282 | return ret; | |
1283 | } | |
1284 | ||
1285 | static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) | |
1286 | { | |
1287 | struct tmem_pool *pool; | |
1288 | int ret = -1; | |
1289 | unsigned long flags; | |
1290 | ||
1291 | local_irq_save(flags); | |
1292 | zcache_flobj_total++; | |
1293 | pool = zcache_get_pool_by_id(pool_id); | |
1294 | if (likely(pool != NULL)) { | |
1295 | if (atomic_read(&pool->obj_count) > 0) | |
1296 | ret = tmem_flush_object(pool, oidp); | |
1297 | zcache_put_pool(pool); | |
1298 | } | |
1299 | if (ret >= 0) | |
1300 | zcache_flobj_found++; | |
1301 | local_irq_restore(flags); | |
1302 | return ret; | |
1303 | } | |
1304 | ||
1305 | static int zcache_destroy_pool(int pool_id) | |
1306 | { | |
1307 | struct tmem_pool *pool = NULL; | |
1308 | int ret = -1; | |
1309 | ||
1310 | if (pool_id < 0) | |
1311 | goto out; | |
1312 | pool = zcache_client.tmem_pools[pool_id]; | |
1313 | if (pool == NULL) | |
1314 | goto out; | |
1315 | zcache_client.tmem_pools[pool_id] = NULL; | |
1316 | /* wait for pool activity on other cpus to quiesce */ | |
1317 | while (atomic_read(&pool->refcount) != 0) | |
1318 | ; | |
1319 | local_bh_disable(); | |
1320 | ret = tmem_destroy_pool(pool); | |
1321 | local_bh_enable(); | |
1322 | kfree(pool); | |
1323 | pr_info("zcache: destroyed pool id=%d\n", pool_id); | |
1324 | out: | |
1325 | return ret; | |
1326 | } | |
1327 | ||
1328 | static int zcache_new_pool(uint32_t flags) | |
1329 | { | |
1330 | int poolid = -1; | |
1331 | struct tmem_pool *pool; | |
1332 | ||
1333 | pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); | |
1334 | if (pool == NULL) { | |
1335 | pr_info("zcache: pool creation failed: out of memory\n"); | |
1336 | goto out; | |
1337 | } | |
1338 | ||
1339 | for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) | |
1340 | if (zcache_client.tmem_pools[poolid] == NULL) | |
1341 | break; | |
1342 | if (poolid >= MAX_POOLS_PER_CLIENT) { | |
1343 | pr_info("zcache: pool creation failed: max exceeded\n"); | |
1344 | kfree(pool); | |
1345 | poolid = -1; | |
1346 | goto out; | |
1347 | } | |
1348 | atomic_set(&pool->refcount, 0); | |
1349 | pool->client = &zcache_client; | |
1350 | pool->pool_id = poolid; | |
1351 | tmem_new_pool(pool, flags); | |
1352 | zcache_client.tmem_pools[poolid] = pool; | |
1353 | pr_info("zcache: created %s tmem pool, id=%d\n", | |
1354 | flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", | |
1355 | poolid); | |
1356 | out: | |
1357 | return poolid; | |
1358 | } | |
1359 | ||
1360 | /********** | |
1361 | * Two kernel functionalities currently can be layered on top of tmem. | |
1362 | * These are "cleancache" which is used as a second-chance cache for clean | |
1363 | * page cache pages; and "frontswap" which is used for swap pages | |
1364 | * to avoid writes to disk. A generic "shim" is provided here for each | |
1365 | * to translate in-kernel semantics to zcache semantics. | |
1366 | */ | |
1367 | ||
1368 | #ifdef CONFIG_CLEANCACHE | |
1369 | static void zcache_cleancache_put_page(int pool_id, | |
1370 | struct cleancache_filekey key, | |
1371 | pgoff_t index, struct page *page) | |
1372 | { | |
1373 | u32 ind = (u32) index; | |
1374 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1375 | ||
1376 | if (likely(ind == index)) | |
1377 | (void)zcache_put_page(pool_id, &oid, index, page); | |
1378 | } | |
1379 | ||
1380 | static int zcache_cleancache_get_page(int pool_id, | |
1381 | struct cleancache_filekey key, | |
1382 | pgoff_t index, struct page *page) | |
1383 | { | |
1384 | u32 ind = (u32) index; | |
1385 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1386 | int ret = -1; | |
1387 | ||
1388 | if (likely(ind == index)) | |
1389 | ret = zcache_get_page(pool_id, &oid, index, page); | |
1390 | return ret; | |
1391 | } | |
1392 | ||
1393 | static void zcache_cleancache_flush_page(int pool_id, | |
1394 | struct cleancache_filekey key, | |
1395 | pgoff_t index) | |
1396 | { | |
1397 | u32 ind = (u32) index; | |
1398 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1399 | ||
1400 | if (likely(ind == index)) | |
1401 | (void)zcache_flush_page(pool_id, &oid, ind); | |
1402 | } | |
1403 | ||
1404 | static void zcache_cleancache_flush_inode(int pool_id, | |
1405 | struct cleancache_filekey key) | |
1406 | { | |
1407 | struct tmem_oid oid = *(struct tmem_oid *)&key; | |
1408 | ||
1409 | (void)zcache_flush_object(pool_id, &oid); | |
1410 | } | |
1411 | ||
1412 | static void zcache_cleancache_flush_fs(int pool_id) | |
1413 | { | |
1414 | if (pool_id >= 0) | |
1415 | (void)zcache_destroy_pool(pool_id); | |
1416 | } | |
1417 | ||
1418 | static int zcache_cleancache_init_fs(size_t pagesize) | |
1419 | { | |
1420 | BUG_ON(sizeof(struct cleancache_filekey) != | |
1421 | sizeof(struct tmem_oid)); | |
1422 | BUG_ON(pagesize != PAGE_SIZE); | |
1423 | return zcache_new_pool(0); | |
1424 | } | |
1425 | ||
1426 | static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) | |
1427 | { | |
1428 | /* shared pools are unsupported and map to private */ | |
1429 | BUG_ON(sizeof(struct cleancache_filekey) != | |
1430 | sizeof(struct tmem_oid)); | |
1431 | BUG_ON(pagesize != PAGE_SIZE); | |
1432 | return zcache_new_pool(0); | |
1433 | } | |
1434 | ||
1435 | static struct cleancache_ops zcache_cleancache_ops = { | |
1436 | .put_page = zcache_cleancache_put_page, | |
1437 | .get_page = zcache_cleancache_get_page, | |
1438 | .flush_page = zcache_cleancache_flush_page, | |
1439 | .flush_inode = zcache_cleancache_flush_inode, | |
1440 | .flush_fs = zcache_cleancache_flush_fs, | |
1441 | .init_shared_fs = zcache_cleancache_init_shared_fs, | |
1442 | .init_fs = zcache_cleancache_init_fs | |
1443 | }; | |
1444 | ||
1445 | struct cleancache_ops zcache_cleancache_register_ops(void) | |
1446 | { | |
1447 | struct cleancache_ops old_ops = | |
1448 | cleancache_register_ops(&zcache_cleancache_ops); | |
1449 | ||
1450 | return old_ops; | |
1451 | } | |
1452 | #endif | |
1453 | ||
1454 | #ifdef CONFIG_FRONTSWAP | |
1455 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | |
1456 | static int zcache_frontswap_poolid = -1; | |
1457 | ||
1458 | /* | |
1459 | * Swizzling increases objects per swaptype, increasing tmem concurrency | |
1460 | * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS | |
1461 | */ | |
1462 | #define SWIZ_BITS 4 | |
1463 | #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) | |
1464 | #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) | |
1465 | #define iswiz(_ind) (_ind >> SWIZ_BITS) | |
1466 | ||
1467 | static inline struct tmem_oid oswiz(unsigned type, u32 ind) | |
1468 | { | |
1469 | struct tmem_oid oid = { .oid = { 0 } }; | |
1470 | oid.oid[0] = _oswiz(type, ind); | |
1471 | return oid; | |
1472 | } | |
1473 | ||
1474 | static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, | |
1475 | struct page *page) | |
1476 | { | |
1477 | u64 ind64 = (u64)offset; | |
1478 | u32 ind = (u32)offset; | |
1479 | struct tmem_oid oid = oswiz(type, ind); | |
1480 | int ret = -1; | |
1481 | unsigned long flags; | |
1482 | ||
1483 | BUG_ON(!PageLocked(page)); | |
1484 | if (likely(ind64 == ind)) { | |
1485 | local_irq_save(flags); | |
1486 | ret = zcache_put_page(zcache_frontswap_poolid, &oid, | |
1487 | iswiz(ind), page); | |
1488 | local_irq_restore(flags); | |
1489 | } | |
1490 | return ret; | |
1491 | } | |
1492 | ||
1493 | /* returns 0 if the page was successfully gotten from frontswap, -1 if | |
1494 | * was not present (should never happen!) */ | |
1495 | static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, | |
1496 | struct page *page) | |
1497 | { | |
1498 | u64 ind64 = (u64)offset; | |
1499 | u32 ind = (u32)offset; | |
1500 | struct tmem_oid oid = oswiz(type, ind); | |
1501 | int ret = -1; | |
1502 | ||
1503 | BUG_ON(!PageLocked(page)); | |
1504 | if (likely(ind64 == ind)) | |
1505 | ret = zcache_get_page(zcache_frontswap_poolid, &oid, | |
1506 | iswiz(ind), page); | |
1507 | return ret; | |
1508 | } | |
1509 | ||
1510 | /* flush a single page from frontswap */ | |
1511 | static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) | |
1512 | { | |
1513 | u64 ind64 = (u64)offset; | |
1514 | u32 ind = (u32)offset; | |
1515 | struct tmem_oid oid = oswiz(type, ind); | |
1516 | ||
1517 | if (likely(ind64 == ind)) | |
1518 | (void)zcache_flush_page(zcache_frontswap_poolid, &oid, | |
1519 | iswiz(ind)); | |
1520 | } | |
1521 | ||
1522 | /* flush all pages from the passed swaptype */ | |
1523 | static void zcache_frontswap_flush_area(unsigned type) | |
1524 | { | |
1525 | struct tmem_oid oid; | |
1526 | int ind; | |
1527 | ||
1528 | for (ind = SWIZ_MASK; ind >= 0; ind--) { | |
1529 | oid = oswiz(type, ind); | |
1530 | (void)zcache_flush_object(zcache_frontswap_poolid, &oid); | |
1531 | } | |
1532 | } | |
1533 | ||
1534 | static void zcache_frontswap_init(unsigned ignored) | |
1535 | { | |
1536 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | |
1537 | if (zcache_frontswap_poolid < 0) | |
1538 | zcache_frontswap_poolid = zcache_new_pool(TMEM_POOL_PERSIST); | |
1539 | } | |
1540 | ||
1541 | static struct frontswap_ops zcache_frontswap_ops = { | |
1542 | .put_page = zcache_frontswap_put_page, | |
1543 | .get_page = zcache_frontswap_get_page, | |
1544 | .flush_page = zcache_frontswap_flush_page, | |
1545 | .flush_area = zcache_frontswap_flush_area, | |
1546 | .init = zcache_frontswap_init | |
1547 | }; | |
1548 | ||
1549 | struct frontswap_ops zcache_frontswap_register_ops(void) | |
1550 | { | |
1551 | struct frontswap_ops old_ops = | |
1552 | frontswap_register_ops(&zcache_frontswap_ops); | |
1553 | ||
1554 | return old_ops; | |
1555 | } | |
1556 | #endif | |
1557 | ||
1558 | /* | |
1559 | * zcache initialization | |
1560 | * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR | |
1561 | * NOTHING HAPPENS! | |
1562 | */ | |
1563 | ||
1564 | static int zcache_enabled; | |
1565 | ||
1566 | static int __init enable_zcache(char *s) | |
1567 | { | |
1568 | zcache_enabled = 1; | |
1569 | return 1; | |
1570 | } | |
1571 | __setup("zcache", enable_zcache); | |
1572 | ||
1573 | /* allow independent dynamic disabling of cleancache and frontswap */ | |
1574 | ||
1575 | static int use_cleancache = 1; | |
1576 | ||
1577 | static int __init no_cleancache(char *s) | |
1578 | { | |
1579 | use_cleancache = 0; | |
1580 | return 1; | |
1581 | } | |
1582 | ||
1583 | __setup("nocleancache", no_cleancache); | |
1584 | ||
1585 | static int use_frontswap = 1; | |
1586 | ||
1587 | static int __init no_frontswap(char *s) | |
1588 | { | |
1589 | use_frontswap = 0; | |
1590 | return 1; | |
1591 | } | |
1592 | ||
1593 | __setup("nofrontswap", no_frontswap); | |
1594 | ||
1595 | static int __init zcache_init(void) | |
1596 | { | |
1597 | #ifdef CONFIG_SYSFS | |
1598 | int ret = 0; | |
1599 | ||
1600 | ret = sysfs_create_group(mm_kobj, &zcache_attr_group); | |
1601 | if (ret) { | |
1602 | pr_err("zcache: can't create sysfs\n"); | |
1603 | goto out; | |
1604 | } | |
1605 | #endif /* CONFIG_SYSFS */ | |
1606 | #if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP) | |
1607 | if (zcache_enabled) { | |
1608 | unsigned int cpu; | |
1609 | ||
1610 | tmem_register_hostops(&zcache_hostops); | |
1611 | tmem_register_pamops(&zcache_pamops); | |
1612 | ret = register_cpu_notifier(&zcache_cpu_notifier_block); | |
1613 | if (ret) { | |
1614 | pr_err("zcache: can't register cpu notifier\n"); | |
1615 | goto out; | |
1616 | } | |
1617 | for_each_online_cpu(cpu) { | |
1618 | void *pcpu = (void *)(long)cpu; | |
1619 | zcache_cpu_notifier(&zcache_cpu_notifier_block, | |
1620 | CPU_UP_PREPARE, pcpu); | |
1621 | } | |
1622 | } | |
1623 | zcache_objnode_cache = kmem_cache_create("zcache_objnode", | |
1624 | sizeof(struct tmem_objnode), 0, 0, NULL); | |
1625 | zcache_obj_cache = kmem_cache_create("zcache_obj", | |
1626 | sizeof(struct tmem_obj), 0, 0, NULL); | |
1627 | #endif | |
1628 | #ifdef CONFIG_CLEANCACHE | |
1629 | if (zcache_enabled && use_cleancache) { | |
1630 | struct cleancache_ops old_ops; | |
1631 | ||
1632 | zbud_init(); | |
1633 | register_shrinker(&zcache_shrinker); | |
1634 | old_ops = zcache_cleancache_register_ops(); | |
1635 | pr_info("zcache: cleancache enabled using kernel " | |
1636 | "transcendent memory and compression buddies\n"); | |
1637 | if (old_ops.init_fs != NULL) | |
1638 | pr_warning("zcache: cleancache_ops overridden"); | |
1639 | } | |
1640 | #endif | |
1641 | #ifdef CONFIG_FRONTSWAP | |
1642 | if (zcache_enabled && use_frontswap) { | |
1643 | struct frontswap_ops old_ops; | |
1644 | ||
1645 | zcache_client.xvpool = xv_create_pool(); | |
1646 | if (zcache_client.xvpool == NULL) { | |
1647 | pr_err("zcache: can't create xvpool\n"); | |
1648 | goto out; | |
1649 | } | |
1650 | old_ops = zcache_frontswap_register_ops(); | |
1651 | pr_info("zcache: frontswap enabled using kernel " | |
1652 | "transcendent memory and xvmalloc\n"); | |
1653 | if (old_ops.init != NULL) | |
1654 | pr_warning("ktmem: frontswap_ops overridden"); | |
1655 | } | |
1656 | #endif | |
1657 | out: | |
1658 | return ret; | |
1659 | } | |
1660 | ||
1661 | module_init(zcache_init) |