Commit | Line | Data |
---|---|---|
faca2ef7 DM |
1 | /* |
2 | * zbud.c - Compression buddies allocator | |
3 | * | |
4 | * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp. | |
5 | * | |
6 | * Compression buddies ("zbud") provides for efficiently packing two | |
7 | * (or, possibly in the future, more) compressed pages ("zpages") into | |
8 | * a single "raw" pageframe and for tracking both zpages and pageframes | |
9 | * so that whole pageframes can be easily reclaimed in LRU-like order. | |
10 | * It is designed to be used in conjunction with transcendent memory | |
11 | * ("tmem"); for example separate LRU lists are maintained for persistent | |
12 | * vs. ephemeral pages. | |
13 | * | |
14 | * A zbudpage is an overlay for a struct page and thus each zbudpage | |
15 | * refers to a physical pageframe of RAM. When the caller passes a | |
16 | * struct page from the kernel's page allocator, zbud "transforms" it | |
17 | * to a zbudpage which sets/uses a different set of fields than the | |
18 | * struct-page and thus must "untransform" it back by reinitializing | |
19 | * certain fields before the struct-page can be freed. The fields | |
20 | * of a zbudpage include a page lock for controlling access to the | |
21 | * corresponding pageframe, and there is a size field for each zpage. | |
22 | * Each zbudpage also lives on two linked lists: a "budlist" which is | |
23 | * used to support efficient buddying of zpages; and an "lru" which | |
24 | * is used for reclaiming pageframes in approximately least-recently-used | |
25 | * order. | |
26 | * | |
27 | * A zbudpageframe is a pageframe divided up into aligned 64-byte "chunks" | |
28 | * which contain the compressed data for zero, one, or two zbuds. Contained | |
29 | * with the compressed data is a tmem_handle which is a key to allow | |
30 | * the same data to be found via the tmem interface so the zpage can | |
31 | * be invalidated (for ephemeral pages) or repatriated to the swap cache | |
32 | * (for persistent pages). The contents of a zbudpageframe must never | |
33 | * be accessed without holding the page lock for the corresponding | |
34 | * zbudpage and, to accomodate highmem machines, the contents may | |
35 | * only be examined or changes when kmapped. Thus, when in use, a | |
36 | * kmapped zbudpageframe is referred to in the zbud code as "void *zbpg". | |
37 | * | |
38 | * Note that the term "zbud" refers to the combination of a zpage and | |
39 | * a tmem_handle that is stored as one of possibly two "buddied" zpages; | |
40 | * it also generically refers to this allocator... sorry for any confusion. | |
41 | * | |
42 | * A zbudref is a pointer to a struct zbudpage (which can be cast to a | |
43 | * struct page), with the LSB either cleared or set to indicate, respectively, | |
44 | * the first or second zpage in the zbudpageframe. Since a zbudref can be | |
45 | * cast to a pointer, it is used as the tmem "pampd" pointer and uniquely | |
46 | * references a stored tmem page and so is the only zbud data structure | |
47 | * externally visible to zbud.c/zbud.h. | |
48 | * | |
49 | * Since we wish to reclaim entire pageframes but zpages may be randomly | |
50 | * added and deleted to any given pageframe, we approximate LRU by | |
51 | * promoting a pageframe to MRU when a zpage is added to it, but | |
52 | * leaving it at the current place in the list when a zpage is deleted | |
53 | * from it. As a side effect, zpages that are difficult to buddy (e.g. | |
54 | * very large paages) will be reclaimed faster than average, which seems | |
55 | * reasonable. | |
56 | * | |
57 | * In the current implementation, no more than two zpages may be stored in | |
58 | * any pageframe and no zpage ever crosses a pageframe boundary. While | |
59 | * other zpage allocation mechanisms may allow greater density, this two | |
60 | * zpage-per-pageframe limit both ensures simple reclaim of pageframes | |
61 | * (including garbage collection of references to the contents of those | |
62 | * pageframes from tmem data structures) AND avoids the need for compaction. | |
63 | * With additional complexity, zbud could be modified to support storing | |
64 | * up to three zpages per pageframe or, to handle larger average zpages, | |
65 | * up to three zpages per pair of pageframes, but it is not clear if the | |
66 | * additional complexity would be worth it. So consider it an exercise | |
67 | * for future developers. | |
68 | * | |
69 | * Note also that zbud does no page allocation or freeing. This is so | |
70 | * that the caller has complete control over and, for accounting, visibility | |
71 | * into if/when pages are allocated and freed. | |
72 | * | |
73 | * Finally, note that zbud limits the size of zpages it can store; the | |
74 | * caller must check the zpage size with zbud_max_buddy_size before | |
75 | * storing it, else BUGs will result. User beware. | |
76 | */ | |
77 | ||
78 | #include <linux/module.h> | |
79 | #include <linux/highmem.h> | |
80 | #include <linux/list.h> | |
81 | #include <linux/spinlock.h> | |
82 | #include <linux/pagemap.h> | |
83 | #include <linux/atomic.h> | |
84 | #include <linux/bug.h> | |
85 | #include "tmem.h" | |
86 | #include "zcache.h" | |
87 | #include "zbud.h" | |
88 | ||
89 | /* | |
90 | * We need to ensure that a struct zbudpage is never larger than a | |
91 | * struct page. This is checked with a BUG_ON in zbud_init. | |
92 | * | |
93 | * The unevictable field indicates that a zbud is being added to the | |
94 | * zbudpage. Since this is a two-phase process (due to tmem locking), | |
95 | * this field locks the zbudpage against eviction when a zbud match | |
96 | * or creation is in process. Since this addition process may occur | |
97 | * in parallel for two zbuds in one zbudpage, the field is a counter | |
98 | * that must not exceed two. | |
99 | */ | |
100 | struct zbudpage { | |
101 | union { | |
102 | struct page page; | |
103 | struct { | |
104 | unsigned long space_for_flags; | |
105 | struct { | |
e49ee36d DM |
106 | unsigned zbud0_size: PAGE_SHIFT; |
107 | unsigned zbud1_size: PAGE_SHIFT; | |
faca2ef7 DM |
108 | unsigned unevictable:2; |
109 | }; | |
110 | struct list_head budlist; | |
111 | struct list_head lru; | |
112 | }; | |
113 | }; | |
114 | }; | |
e49ee36d DM |
115 | #if (PAGE_SHIFT * 2) + 2 > BITS_PER_LONG |
116 | #error "zbud won't work for this arch, PAGE_SIZE is too large" | |
117 | #endif | |
faca2ef7 DM |
118 | |
119 | struct zbudref { | |
120 | union { | |
121 | struct zbudpage *zbudpage; | |
122 | unsigned long zbudref; | |
123 | }; | |
124 | }; | |
125 | ||
126 | #define CHUNK_SHIFT 6 | |
127 | #define CHUNK_SIZE (1 << CHUNK_SHIFT) | |
128 | #define CHUNK_MASK (~(CHUNK_SIZE-1)) | |
129 | #define NCHUNKS (PAGE_SIZE >> CHUNK_SHIFT) | |
130 | #define MAX_CHUNK (NCHUNKS-1) | |
131 | ||
132 | /* | |
133 | * The following functions deal with the difference between struct | |
134 | * page and struct zbudpage. Note the hack of using the pageflags | |
135 | * from struct page; this is to avoid duplicating all the complex | |
136 | * pageflag macros. | |
137 | */ | |
138 | static inline void zbudpage_spin_lock(struct zbudpage *zbudpage) | |
139 | { | |
140 | struct page *page = (struct page *)zbudpage; | |
141 | ||
142 | while (unlikely(test_and_set_bit_lock(PG_locked, &page->flags))) { | |
143 | do { | |
144 | cpu_relax(); | |
145 | } while (test_bit(PG_locked, &page->flags)); | |
146 | } | |
147 | } | |
148 | ||
149 | static inline void zbudpage_spin_unlock(struct zbudpage *zbudpage) | |
150 | { | |
151 | struct page *page = (struct page *)zbudpage; | |
152 | ||
153 | clear_bit(PG_locked, &page->flags); | |
154 | } | |
155 | ||
156 | static inline int zbudpage_spin_trylock(struct zbudpage *zbudpage) | |
157 | { | |
158 | return trylock_page((struct page *)zbudpage); | |
159 | } | |
160 | ||
161 | static inline int zbudpage_is_locked(struct zbudpage *zbudpage) | |
162 | { | |
163 | return PageLocked((struct page *)zbudpage); | |
164 | } | |
165 | ||
166 | static inline void *kmap_zbudpage_atomic(struct zbudpage *zbudpage) | |
167 | { | |
168 | return kmap_atomic((struct page *)zbudpage); | |
169 | } | |
170 | ||
171 | /* | |
172 | * A dying zbudpage is an ephemeral page in the process of being evicted. | |
173 | * Any data contained in the zbudpage is invalid and we are just waiting for | |
174 | * the tmem pampds to be invalidated before freeing the page | |
175 | */ | |
176 | static inline int zbudpage_is_dying(struct zbudpage *zbudpage) | |
177 | { | |
178 | struct page *page = (struct page *)zbudpage; | |
179 | ||
180 | return test_bit(PG_reclaim, &page->flags); | |
181 | } | |
182 | ||
183 | static inline void zbudpage_set_dying(struct zbudpage *zbudpage) | |
184 | { | |
185 | struct page *page = (struct page *)zbudpage; | |
186 | ||
187 | set_bit(PG_reclaim, &page->flags); | |
188 | } | |
189 | ||
190 | static inline void zbudpage_clear_dying(struct zbudpage *zbudpage) | |
191 | { | |
192 | struct page *page = (struct page *)zbudpage; | |
193 | ||
194 | clear_bit(PG_reclaim, &page->flags); | |
195 | } | |
196 | ||
197 | /* | |
198 | * A zombie zbudpage is a persistent page in the process of being evicted. | |
199 | * The data contained in the zbudpage is valid and we are just waiting for | |
200 | * the tmem pampds to be invalidated before freeing the page | |
201 | */ | |
202 | static inline int zbudpage_is_zombie(struct zbudpage *zbudpage) | |
203 | { | |
204 | struct page *page = (struct page *)zbudpage; | |
205 | ||
206 | return test_bit(PG_dirty, &page->flags); | |
207 | } | |
208 | ||
209 | static inline void zbudpage_set_zombie(struct zbudpage *zbudpage) | |
210 | { | |
211 | struct page *page = (struct page *)zbudpage; | |
212 | ||
213 | set_bit(PG_dirty, &page->flags); | |
214 | } | |
215 | ||
216 | static inline void zbudpage_clear_zombie(struct zbudpage *zbudpage) | |
217 | { | |
218 | struct page *page = (struct page *)zbudpage; | |
219 | ||
220 | clear_bit(PG_dirty, &page->flags); | |
221 | } | |
222 | ||
223 | static inline void kunmap_zbudpage_atomic(void *zbpg) | |
224 | { | |
225 | kunmap_atomic(zbpg); | |
226 | } | |
227 | ||
228 | /* | |
229 | * zbud "translation" and helper functions | |
230 | */ | |
231 | ||
232 | static inline struct zbudpage *zbudref_to_zbudpage(struct zbudref *zref) | |
233 | { | |
234 | unsigned long zbud = (unsigned long)zref; | |
235 | zbud &= ~1UL; | |
236 | return (struct zbudpage *)zbud; | |
237 | } | |
238 | ||
239 | static inline struct zbudref *zbudpage_to_zbudref(struct zbudpage *zbudpage, | |
240 | unsigned budnum) | |
241 | { | |
242 | unsigned long zbud = (unsigned long)zbudpage; | |
243 | BUG_ON(budnum > 1); | |
244 | zbud |= budnum; | |
245 | return (struct zbudref *)zbud; | |
246 | } | |
247 | ||
248 | static inline int zbudref_budnum(struct zbudref *zbudref) | |
249 | { | |
250 | unsigned long zbud = (unsigned long)zbudref; | |
251 | return zbud & 1UL; | |
252 | } | |
253 | ||
254 | static inline unsigned zbud_max_size(void) | |
255 | { | |
256 | return MAX_CHUNK << CHUNK_SHIFT; | |
257 | } | |
258 | ||
259 | static inline unsigned zbud_size_to_chunks(unsigned size) | |
260 | { | |
261 | BUG_ON(size == 0 || size > zbud_max_size()); | |
262 | return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; | |
263 | } | |
264 | ||
265 | /* can only be used between kmap_zbudpage_atomic/kunmap_zbudpage_atomic! */ | |
266 | static inline char *zbud_data(void *zbpg, | |
267 | unsigned budnum, unsigned size) | |
268 | { | |
269 | char *p; | |
270 | ||
271 | BUG_ON(size == 0 || size > zbud_max_size()); | |
272 | p = (char *)zbpg; | |
273 | if (budnum == 1) | |
274 | p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); | |
275 | return p; | |
276 | } | |
277 | ||
278 | /* | |
279 | * These are all informative and exposed through debugfs... except for | |
280 | * the arrays... anyone know how to do that? To avoid confusion for | |
281 | * debugfs viewers, some of these should also be atomic_long_t, but | |
282 | * I don't know how to expose atomics via debugfs either... | |
283 | */ | |
feb897e1 KRW |
284 | static ssize_t zbud_eph_pageframes; |
285 | static ssize_t zbud_pers_pageframes; | |
286 | static ssize_t zbud_eph_zpages; | |
287 | static ssize_t zbud_pers_zpages; | |
faca2ef7 DM |
288 | static u64 zbud_eph_zbytes; |
289 | static u64 zbud_pers_zbytes; | |
feb897e1 KRW |
290 | static ssize_t zbud_eph_evicted_pageframes; |
291 | static ssize_t zbud_pers_evicted_pageframes; | |
292 | static ssize_t zbud_eph_cumul_zpages; | |
293 | static ssize_t zbud_pers_cumul_zpages; | |
faca2ef7 DM |
294 | static u64 zbud_eph_cumul_zbytes; |
295 | static u64 zbud_pers_cumul_zbytes; | |
feb897e1 KRW |
296 | static ssize_t zbud_eph_cumul_chunk_counts[NCHUNKS]; |
297 | static ssize_t zbud_pers_cumul_chunk_counts[NCHUNKS]; | |
298 | static ssize_t zbud_eph_buddied_count; | |
299 | static ssize_t zbud_pers_buddied_count; | |
300 | static ssize_t zbud_eph_unbuddied_count; | |
301 | static ssize_t zbud_pers_unbuddied_count; | |
302 | static ssize_t zbud_eph_zombie_count; | |
303 | static ssize_t zbud_pers_zombie_count; | |
faca2ef7 DM |
304 | static atomic_t zbud_eph_zombie_atomic; |
305 | static atomic_t zbud_pers_zombie_atomic; | |
306 | ||
307 | #ifdef CONFIG_DEBUG_FS | |
308 | #include <linux/debugfs.h> | |
309 | #define zdfs debugfs_create_size_t | |
310 | #define zdfs64 debugfs_create_u64 | |
311 | static int zbud_debugfs_init(void) | |
312 | { | |
313 | struct dentry *root = debugfs_create_dir("zbud", NULL); | |
314 | if (root == NULL) | |
315 | return -ENXIO; | |
316 | ||
317 | /* | |
318 | * would be nice to dump the sizes of the unbuddied | |
319 | * arrays, like was done with sysfs, but it doesn't | |
320 | * look like debugfs is flexible enough to do that | |
321 | */ | |
322 | zdfs64("eph_zbytes", S_IRUGO, root, &zbud_eph_zbytes); | |
323 | zdfs64("eph_cumul_zbytes", S_IRUGO, root, &zbud_eph_cumul_zbytes); | |
324 | zdfs64("pers_zbytes", S_IRUGO, root, &zbud_pers_zbytes); | |
325 | zdfs64("pers_cumul_zbytes", S_IRUGO, root, &zbud_pers_cumul_zbytes); | |
326 | zdfs("eph_cumul_zpages", S_IRUGO, root, &zbud_eph_cumul_zpages); | |
327 | zdfs("eph_evicted_pageframes", S_IRUGO, root, | |
328 | &zbud_eph_evicted_pageframes); | |
329 | zdfs("eph_zpages", S_IRUGO, root, &zbud_eph_zpages); | |
330 | zdfs("eph_pageframes", S_IRUGO, root, &zbud_eph_pageframes); | |
331 | zdfs("eph_buddied_count", S_IRUGO, root, &zbud_eph_buddied_count); | |
332 | zdfs("eph_unbuddied_count", S_IRUGO, root, &zbud_eph_unbuddied_count); | |
333 | zdfs("pers_cumul_zpages", S_IRUGO, root, &zbud_pers_cumul_zpages); | |
334 | zdfs("pers_evicted_pageframes", S_IRUGO, root, | |
335 | &zbud_pers_evicted_pageframes); | |
336 | zdfs("pers_zpages", S_IRUGO, root, &zbud_pers_zpages); | |
337 | zdfs("pers_pageframes", S_IRUGO, root, &zbud_pers_pageframes); | |
338 | zdfs("pers_buddied_count", S_IRUGO, root, &zbud_pers_buddied_count); | |
339 | zdfs("pers_unbuddied_count", S_IRUGO, root, &zbud_pers_unbuddied_count); | |
340 | zdfs("pers_zombie_count", S_IRUGO, root, &zbud_pers_zombie_count); | |
341 | return 0; | |
342 | } | |
343 | #undef zdfs | |
344 | #undef zdfs64 | |
345 | #endif | |
346 | ||
347 | /* protects the buddied list and all unbuddied lists */ | |
348 | static DEFINE_SPINLOCK(zbud_eph_lists_lock); | |
349 | static DEFINE_SPINLOCK(zbud_pers_lists_lock); | |
350 | ||
351 | struct zbud_unbuddied { | |
352 | struct list_head list; | |
353 | unsigned count; | |
354 | }; | |
355 | ||
356 | /* list N contains pages with N chunks USED and NCHUNKS-N unused */ | |
357 | /* element 0 is never used but optimizing that isn't worth it */ | |
358 | static struct zbud_unbuddied zbud_eph_unbuddied[NCHUNKS]; | |
359 | static struct zbud_unbuddied zbud_pers_unbuddied[NCHUNKS]; | |
360 | static LIST_HEAD(zbud_eph_lru_list); | |
361 | static LIST_HEAD(zbud_pers_lru_list); | |
362 | static LIST_HEAD(zbud_eph_buddied_list); | |
363 | static LIST_HEAD(zbud_pers_buddied_list); | |
364 | static LIST_HEAD(zbud_eph_zombie_list); | |
365 | static LIST_HEAD(zbud_pers_zombie_list); | |
366 | ||
367 | /* | |
368 | * Given a struct page, transform it to a zbudpage so that it can be | |
369 | * used by zbud and initialize fields as necessary. | |
370 | */ | |
371 | static inline struct zbudpage *zbud_init_zbudpage(struct page *page, bool eph) | |
372 | { | |
373 | struct zbudpage *zbudpage = (struct zbudpage *)page; | |
374 | ||
375 | BUG_ON(page == NULL); | |
376 | INIT_LIST_HEAD(&zbudpage->budlist); | |
377 | INIT_LIST_HEAD(&zbudpage->lru); | |
378 | zbudpage->zbud0_size = 0; | |
379 | zbudpage->zbud1_size = 0; | |
380 | zbudpage->unevictable = 0; | |
381 | if (eph) | |
382 | zbud_eph_pageframes++; | |
383 | else | |
384 | zbud_pers_pageframes++; | |
385 | return zbudpage; | |
386 | } | |
387 | ||
388 | /* "Transform" a zbudpage back to a struct page suitable to free. */ | |
389 | static inline struct page *zbud_unuse_zbudpage(struct zbudpage *zbudpage, | |
390 | bool eph) | |
391 | { | |
392 | struct page *page = (struct page *)zbudpage; | |
393 | ||
394 | BUG_ON(!list_empty(&zbudpage->budlist)); | |
395 | BUG_ON(!list_empty(&zbudpage->lru)); | |
396 | BUG_ON(zbudpage->zbud0_size != 0); | |
397 | BUG_ON(zbudpage->zbud1_size != 0); | |
398 | BUG_ON(!PageLocked(page)); | |
399 | BUG_ON(zbudpage->unevictable != 0); | |
400 | BUG_ON(zbudpage_is_dying(zbudpage)); | |
401 | BUG_ON(zbudpage_is_zombie(zbudpage)); | |
402 | if (eph) | |
403 | zbud_eph_pageframes--; | |
404 | else | |
405 | zbud_pers_pageframes--; | |
406 | zbudpage_spin_unlock(zbudpage); | |
407 | reset_page_mapcount(page); | |
408 | init_page_count(page); | |
409 | page->index = 0; | |
410 | return page; | |
411 | } | |
412 | ||
413 | /* Mark a zbud as unused and do accounting */ | |
414 | static inline void zbud_unuse_zbud(struct zbudpage *zbudpage, | |
415 | int budnum, bool eph) | |
416 | { | |
417 | unsigned size; | |
418 | ||
419 | BUG_ON(!zbudpage_is_locked(zbudpage)); | |
420 | if (budnum == 0) { | |
421 | size = zbudpage->zbud0_size; | |
422 | zbudpage->zbud0_size = 0; | |
423 | } else { | |
424 | size = zbudpage->zbud1_size; | |
425 | zbudpage->zbud1_size = 0; | |
426 | } | |
427 | if (eph) { | |
428 | zbud_eph_zbytes -= size; | |
429 | zbud_eph_zpages--; | |
430 | } else { | |
431 | zbud_pers_zbytes -= size; | |
432 | zbud_pers_zpages--; | |
433 | } | |
434 | } | |
435 | ||
436 | /* | |
437 | * Given a zbudpage/budnum/size, a tmem handle, and a kmapped pointer | |
438 | * to some data, set up the zbud appropriately including data copying | |
439 | * and accounting. Note that if cdata is NULL, the data copying is | |
440 | * skipped. (This is useful for lazy writes such as for RAMster.) | |
441 | */ | |
442 | static void zbud_init_zbud(struct zbudpage *zbudpage, struct tmem_handle *th, | |
443 | bool eph, void *cdata, | |
444 | unsigned budnum, unsigned size) | |
445 | { | |
446 | char *to; | |
447 | void *zbpg; | |
448 | struct tmem_handle *to_th; | |
449 | unsigned nchunks = zbud_size_to_chunks(size); | |
450 | ||
451 | BUG_ON(!zbudpage_is_locked(zbudpage)); | |
452 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
453 | to = zbud_data(zbpg, budnum, size); | |
454 | to_th = (struct tmem_handle *)to; | |
455 | to_th->index = th->index; | |
456 | to_th->oid = th->oid; | |
457 | to_th->pool_id = th->pool_id; | |
458 | to_th->client_id = th->client_id; | |
459 | to += sizeof(struct tmem_handle); | |
460 | if (cdata != NULL) | |
461 | memcpy(to, cdata, size - sizeof(struct tmem_handle)); | |
462 | kunmap_zbudpage_atomic(zbpg); | |
463 | if (budnum == 0) | |
464 | zbudpage->zbud0_size = size; | |
465 | else | |
466 | zbudpage->zbud1_size = size; | |
467 | if (eph) { | |
468 | zbud_eph_cumul_chunk_counts[nchunks]++; | |
469 | zbud_eph_zpages++; | |
470 | zbud_eph_cumul_zpages++; | |
471 | zbud_eph_zbytes += size; | |
472 | zbud_eph_cumul_zbytes += size; | |
473 | } else { | |
474 | zbud_pers_cumul_chunk_counts[nchunks]++; | |
475 | zbud_pers_zpages++; | |
476 | zbud_pers_cumul_zpages++; | |
477 | zbud_pers_zbytes += size; | |
478 | zbud_pers_cumul_zbytes += size; | |
479 | } | |
480 | } | |
481 | ||
482 | /* | |
483 | * Given a locked dying zbudpage, read out the tmem handles from the data, | |
484 | * unlock the page, then use the handles to tell tmem to flush out its | |
485 | * references | |
486 | */ | |
487 | static void zbud_evict_tmem(struct zbudpage *zbudpage) | |
488 | { | |
489 | int i, j; | |
490 | uint32_t pool_id[2], client_id[2]; | |
491 | uint32_t index[2]; | |
492 | struct tmem_oid oid[2]; | |
493 | struct tmem_pool *pool; | |
494 | void *zbpg; | |
495 | struct tmem_handle *th; | |
496 | unsigned size; | |
497 | ||
498 | /* read out the tmem handles from the data and set aside */ | |
499 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
500 | for (i = 0, j = 0; i < 2; i++) { | |
501 | size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size; | |
502 | if (size) { | |
503 | th = (struct tmem_handle *)zbud_data(zbpg, i, size); | |
504 | client_id[j] = th->client_id; | |
505 | pool_id[j] = th->pool_id; | |
506 | oid[j] = th->oid; | |
507 | index[j] = th->index; | |
508 | j++; | |
509 | zbud_unuse_zbud(zbudpage, i, true); | |
510 | } | |
511 | } | |
512 | kunmap_zbudpage_atomic(zbpg); | |
513 | zbudpage_spin_unlock(zbudpage); | |
514 | /* zbudpage is now an unlocked dying... tell tmem to flush pointers */ | |
515 | for (i = 0; i < j; i++) { | |
516 | pool = zcache_get_pool_by_id(client_id[i], pool_id[i]); | |
517 | if (pool != NULL) { | |
518 | tmem_flush_page(pool, &oid[i], index[i]); | |
519 | zcache_put_pool(pool); | |
520 | } | |
521 | } | |
522 | } | |
523 | ||
524 | /* | |
525 | * Externally callable zbud handling routines. | |
526 | */ | |
527 | ||
528 | /* | |
529 | * Return the maximum size compressed page that can be stored (secretly | |
530 | * setting aside space for the tmem handle. | |
531 | */ | |
532 | unsigned int zbud_max_buddy_size(void) | |
533 | { | |
534 | return zbud_max_size() - sizeof(struct tmem_handle); | |
535 | } | |
536 | ||
537 | /* | |
538 | * Given a zbud reference, free the corresponding zbud from all lists, | |
539 | * mark it as unused, do accounting, and if the freeing of the zbud | |
540 | * frees up an entire pageframe, return it to the caller (else NULL). | |
541 | */ | |
542 | struct page *zbud_free_and_delist(struct zbudref *zref, bool eph, | |
543 | unsigned int *zsize, unsigned int *zpages) | |
544 | { | |
545 | unsigned long budnum = zbudref_budnum(zref); | |
546 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
547 | struct page *page = NULL; | |
548 | unsigned chunks, bud_size, other_bud_size; | |
549 | spinlock_t *lists_lock = | |
550 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
551 | struct zbud_unbuddied *unbud = | |
552 | eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; | |
553 | ||
554 | ||
555 | spin_lock(lists_lock); | |
556 | zbudpage_spin_lock(zbudpage); | |
557 | if (zbudpage_is_dying(zbudpage)) { | |
558 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
559 | zbudpage_spin_unlock(zbudpage); | |
560 | spin_unlock(lists_lock); | |
561 | *zpages = 0; | |
562 | *zsize = 0; | |
563 | goto out; | |
564 | } | |
565 | if (budnum == 0) { | |
566 | bud_size = zbudpage->zbud0_size; | |
567 | other_bud_size = zbudpage->zbud1_size; | |
568 | } else { | |
569 | bud_size = zbudpage->zbud1_size; | |
570 | other_bud_size = zbudpage->zbud0_size; | |
571 | } | |
572 | *zsize = bud_size - sizeof(struct tmem_handle); | |
573 | *zpages = 1; | |
574 | zbud_unuse_zbud(zbudpage, budnum, eph); | |
575 | if (other_bud_size == 0) { /* was unbuddied: unlist and free */ | |
576 | chunks = zbud_size_to_chunks(bud_size) ; | |
577 | if (zbudpage_is_zombie(zbudpage)) { | |
578 | if (eph) | |
579 | zbud_pers_zombie_count = | |
580 | atomic_dec_return(&zbud_eph_zombie_atomic); | |
581 | else | |
582 | zbud_pers_zombie_count = | |
583 | atomic_dec_return(&zbud_pers_zombie_atomic); | |
584 | zbudpage_clear_zombie(zbudpage); | |
585 | } else { | |
586 | BUG_ON(list_empty(&unbud[chunks].list)); | |
587 | list_del_init(&zbudpage->budlist); | |
588 | unbud[chunks].count--; | |
589 | } | |
590 | list_del_init(&zbudpage->lru); | |
591 | spin_unlock(lists_lock); | |
592 | if (eph) | |
593 | zbud_eph_unbuddied_count--; | |
594 | else | |
595 | zbud_pers_unbuddied_count--; | |
596 | page = zbud_unuse_zbudpage(zbudpage, eph); | |
597 | } else { /* was buddied: move remaining buddy to unbuddied list */ | |
598 | chunks = zbud_size_to_chunks(other_bud_size) ; | |
599 | if (!zbudpage_is_zombie(zbudpage)) { | |
600 | list_del_init(&zbudpage->budlist); | |
601 | list_add_tail(&zbudpage->budlist, &unbud[chunks].list); | |
602 | unbud[chunks].count++; | |
603 | } | |
604 | if (eph) { | |
605 | zbud_eph_buddied_count--; | |
606 | zbud_eph_unbuddied_count++; | |
607 | } else { | |
608 | zbud_pers_unbuddied_count++; | |
609 | zbud_pers_buddied_count--; | |
610 | } | |
611 | /* don't mess with lru, no need to move it */ | |
612 | zbudpage_spin_unlock(zbudpage); | |
613 | spin_unlock(lists_lock); | |
614 | } | |
615 | out: | |
616 | return page; | |
617 | } | |
618 | ||
619 | /* | |
620 | * Given a tmem handle, and a kmapped pointer to compressed data of | |
621 | * the given size, try to find an unbuddied zbudpage in which to | |
622 | * create a zbud. If found, put it there, mark the zbudpage unevictable, | |
623 | * and return a zbudref to it. Else return NULL. | |
624 | */ | |
625 | struct zbudref *zbud_match_prep(struct tmem_handle *th, bool eph, | |
626 | void *cdata, unsigned size) | |
627 | { | |
628 | struct zbudpage *zbudpage = NULL, *zbudpage2; | |
629 | unsigned long budnum = 0UL; | |
630 | unsigned nchunks; | |
631 | int i, found_good_buddy = 0; | |
632 | spinlock_t *lists_lock = | |
633 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
634 | struct zbud_unbuddied *unbud = | |
635 | eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; | |
636 | ||
637 | size += sizeof(struct tmem_handle); | |
638 | nchunks = zbud_size_to_chunks(size); | |
639 | for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { | |
640 | spin_lock(lists_lock); | |
641 | if (!list_empty(&unbud[i].list)) { | |
642 | list_for_each_entry_safe(zbudpage, zbudpage2, | |
643 | &unbud[i].list, budlist) { | |
644 | if (zbudpage_spin_trylock(zbudpage)) { | |
645 | found_good_buddy = i; | |
646 | goto found_unbuddied; | |
647 | } | |
648 | } | |
649 | } | |
650 | spin_unlock(lists_lock); | |
651 | } | |
652 | zbudpage = NULL; | |
653 | goto out; | |
654 | ||
655 | found_unbuddied: | |
656 | BUG_ON(!zbudpage_is_locked(zbudpage)); | |
657 | BUG_ON(!((zbudpage->zbud0_size == 0) ^ (zbudpage->zbud1_size == 0))); | |
658 | if (zbudpage->zbud0_size == 0) | |
659 | budnum = 0UL; | |
660 | else if (zbudpage->zbud1_size == 0) | |
661 | budnum = 1UL; | |
662 | list_del_init(&zbudpage->budlist); | |
663 | if (eph) { | |
664 | list_add_tail(&zbudpage->budlist, &zbud_eph_buddied_list); | |
665 | unbud[found_good_buddy].count--; | |
666 | zbud_eph_unbuddied_count--; | |
667 | zbud_eph_buddied_count++; | |
668 | /* "promote" raw zbudpage to most-recently-used */ | |
669 | list_del_init(&zbudpage->lru); | |
670 | list_add_tail(&zbudpage->lru, &zbud_eph_lru_list); | |
671 | } else { | |
672 | list_add_tail(&zbudpage->budlist, &zbud_pers_buddied_list); | |
673 | unbud[found_good_buddy].count--; | |
674 | zbud_pers_unbuddied_count--; | |
675 | zbud_pers_buddied_count++; | |
676 | /* "promote" raw zbudpage to most-recently-used */ | |
677 | list_del_init(&zbudpage->lru); | |
678 | list_add_tail(&zbudpage->lru, &zbud_pers_lru_list); | |
679 | } | |
680 | zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size); | |
681 | zbudpage->unevictable++; | |
682 | BUG_ON(zbudpage->unevictable == 3); | |
683 | zbudpage_spin_unlock(zbudpage); | |
684 | spin_unlock(lists_lock); | |
685 | out: | |
686 | return zbudpage_to_zbudref(zbudpage, budnum); | |
687 | ||
688 | } | |
689 | ||
690 | /* | |
691 | * Given a tmem handle, and a kmapped pointer to compressed data of | |
692 | * the given size, and a newly allocated struct page, create an unevictable | |
693 | * zbud in that new page and return a zbudref to it. | |
694 | */ | |
695 | struct zbudref *zbud_create_prep(struct tmem_handle *th, bool eph, | |
696 | void *cdata, unsigned size, | |
697 | struct page *newpage) | |
698 | { | |
699 | struct zbudpage *zbudpage; | |
700 | unsigned long budnum = 0; | |
701 | unsigned nchunks; | |
702 | spinlock_t *lists_lock = | |
703 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
704 | struct zbud_unbuddied *unbud = | |
705 | eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; | |
706 | ||
707 | #if 0 | |
708 | /* this may be worth it later to support decompress-in-place? */ | |
709 | static unsigned long counter; | |
710 | budnum = counter++ & 1; /* alternate using zbud0 and zbud1 */ | |
711 | #endif | |
712 | ||
713 | if (size > zbud_max_buddy_size()) | |
714 | return NULL; | |
715 | if (newpage == NULL) | |
716 | return NULL; | |
717 | ||
718 | size += sizeof(struct tmem_handle); | |
719 | nchunks = zbud_size_to_chunks(size) ; | |
720 | spin_lock(lists_lock); | |
721 | zbudpage = zbud_init_zbudpage(newpage, eph); | |
722 | zbudpage_spin_lock(zbudpage); | |
723 | list_add_tail(&zbudpage->budlist, &unbud[nchunks].list); | |
724 | if (eph) { | |
725 | list_add_tail(&zbudpage->lru, &zbud_eph_lru_list); | |
726 | zbud_eph_unbuddied_count++; | |
727 | } else { | |
728 | list_add_tail(&zbudpage->lru, &zbud_pers_lru_list); | |
729 | zbud_pers_unbuddied_count++; | |
730 | } | |
731 | unbud[nchunks].count++; | |
732 | zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size); | |
733 | zbudpage->unevictable++; | |
734 | BUG_ON(zbudpage->unevictable == 3); | |
735 | zbudpage_spin_unlock(zbudpage); | |
736 | spin_unlock(lists_lock); | |
737 | return zbudpage_to_zbudref(zbudpage, budnum); | |
738 | } | |
739 | ||
740 | /* | |
741 | * Finish creation of a zbud by, assuming another zbud isn't being created | |
742 | * in parallel, marking it evictable. | |
743 | */ | |
744 | void zbud_create_finish(struct zbudref *zref, bool eph) | |
745 | { | |
746 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
747 | spinlock_t *lists_lock = | |
748 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
749 | ||
750 | spin_lock(lists_lock); | |
751 | zbudpage_spin_lock(zbudpage); | |
752 | BUG_ON(zbudpage_is_dying(zbudpage)); | |
753 | zbudpage->unevictable--; | |
754 | BUG_ON((int)zbudpage->unevictable < 0); | |
755 | zbudpage_spin_unlock(zbudpage); | |
756 | spin_unlock(lists_lock); | |
757 | } | |
758 | ||
759 | /* | |
760 | * Given a zbudref and a struct page, decompress the data from | |
761 | * the zbud into the physical page represented by the struct page | |
762 | * by upcalling to zcache_decompress | |
763 | */ | |
764 | int zbud_decompress(struct page *data_page, struct zbudref *zref, bool eph, | |
765 | void (*decompress)(char *, unsigned int, char *)) | |
766 | { | |
767 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
768 | unsigned long budnum = zbudref_budnum(zref); | |
769 | void *zbpg; | |
770 | char *to_va, *from_va; | |
771 | unsigned size; | |
772 | int ret = -1; | |
773 | spinlock_t *lists_lock = | |
774 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
775 | ||
776 | spin_lock(lists_lock); | |
777 | zbudpage_spin_lock(zbudpage); | |
778 | if (zbudpage_is_dying(zbudpage)) { | |
779 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
780 | goto out; | |
781 | } | |
782 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
783 | to_va = kmap_atomic(data_page); | |
784 | if (budnum == 0) | |
785 | size = zbudpage->zbud0_size; | |
786 | else | |
787 | size = zbudpage->zbud1_size; | |
788 | BUG_ON(size == 0 || size > zbud_max_size()); | |
789 | from_va = zbud_data(zbpg, budnum, size); | |
790 | from_va += sizeof(struct tmem_handle); | |
791 | size -= sizeof(struct tmem_handle); | |
792 | decompress(from_va, size, to_va); | |
793 | kunmap_atomic(to_va); | |
794 | kunmap_zbudpage_atomic(zbpg); | |
795 | ret = 0; | |
796 | out: | |
797 | zbudpage_spin_unlock(zbudpage); | |
798 | spin_unlock(lists_lock); | |
799 | return ret; | |
800 | } | |
801 | ||
802 | /* | |
803 | * Given a zbudref and a kernel pointer, copy the data from | |
804 | * the zbud to the kernel pointer. | |
805 | */ | |
806 | int zbud_copy_from_zbud(char *to_va, struct zbudref *zref, | |
807 | size_t *sizep, bool eph) | |
808 | { | |
809 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
810 | unsigned long budnum = zbudref_budnum(zref); | |
811 | void *zbpg; | |
812 | char *from_va; | |
813 | unsigned size; | |
814 | int ret = -1; | |
815 | spinlock_t *lists_lock = | |
816 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
817 | ||
818 | spin_lock(lists_lock); | |
819 | zbudpage_spin_lock(zbudpage); | |
820 | if (zbudpage_is_dying(zbudpage)) { | |
821 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
822 | goto out; | |
823 | } | |
824 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
825 | if (budnum == 0) | |
826 | size = zbudpage->zbud0_size; | |
827 | else | |
828 | size = zbudpage->zbud1_size; | |
829 | BUG_ON(size == 0 || size > zbud_max_size()); | |
830 | from_va = zbud_data(zbpg, budnum, size); | |
831 | from_va += sizeof(struct tmem_handle); | |
832 | size -= sizeof(struct tmem_handle); | |
833 | *sizep = size; | |
834 | memcpy(to_va, from_va, size); | |
835 | ||
836 | kunmap_zbudpage_atomic(zbpg); | |
837 | ret = 0; | |
838 | out: | |
839 | zbudpage_spin_unlock(zbudpage); | |
840 | spin_unlock(lists_lock); | |
841 | return ret; | |
842 | } | |
843 | ||
844 | /* | |
845 | * Given a zbudref and a kernel pointer, copy the data from | |
846 | * the kernel pointer to the zbud. | |
847 | */ | |
848 | int zbud_copy_to_zbud(struct zbudref *zref, char *from_va, bool eph) | |
849 | { | |
850 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
851 | unsigned long budnum = zbudref_budnum(zref); | |
852 | void *zbpg; | |
853 | char *to_va; | |
854 | unsigned size; | |
855 | int ret = -1; | |
856 | spinlock_t *lists_lock = | |
857 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
858 | ||
859 | spin_lock(lists_lock); | |
860 | zbudpage_spin_lock(zbudpage); | |
861 | if (zbudpage_is_dying(zbudpage)) { | |
862 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
863 | goto out; | |
864 | } | |
865 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
866 | if (budnum == 0) | |
867 | size = zbudpage->zbud0_size; | |
868 | else | |
869 | size = zbudpage->zbud1_size; | |
870 | BUG_ON(size == 0 || size > zbud_max_size()); | |
871 | to_va = zbud_data(zbpg, budnum, size); | |
872 | to_va += sizeof(struct tmem_handle); | |
873 | size -= sizeof(struct tmem_handle); | |
874 | memcpy(to_va, from_va, size); | |
875 | ||
876 | kunmap_zbudpage_atomic(zbpg); | |
877 | ret = 0; | |
878 | out: | |
879 | zbudpage_spin_unlock(zbudpage); | |
880 | spin_unlock(lists_lock); | |
881 | return ret; | |
882 | } | |
883 | ||
884 | /* | |
885 | * Choose an ephemeral LRU zbudpage that is evictable (not locked), ensure | |
886 | * there are no references to it remaining, and return the now unused | |
887 | * (and re-init'ed) struct page and the total amount of compressed | |
888 | * data that was evicted. | |
889 | */ | |
890 | struct page *zbud_evict_pageframe_lru(unsigned int *zsize, unsigned int *zpages) | |
891 | { | |
892 | struct zbudpage *zbudpage = NULL, *zbudpage2; | |
893 | struct zbud_unbuddied *unbud = zbud_eph_unbuddied; | |
894 | struct page *page = NULL; | |
895 | bool irqs_disabled = irqs_disabled(); | |
896 | ||
897 | /* | |
898 | * Since this can be called indirectly from cleancache_put, which | |
899 | * has interrupts disabled, as well as frontswap_put, which does not, | |
900 | * we need to be able to handle both cases, even though it is ugly. | |
901 | */ | |
902 | if (irqs_disabled) | |
903 | spin_lock(&zbud_eph_lists_lock); | |
904 | else | |
905 | spin_lock_bh(&zbud_eph_lists_lock); | |
906 | *zsize = 0; | |
907 | if (list_empty(&zbud_eph_lru_list)) | |
908 | goto unlock_out; | |
909 | list_for_each_entry_safe(zbudpage, zbudpage2, &zbud_eph_lru_list, lru) { | |
910 | /* skip a locked zbudpage */ | |
911 | if (unlikely(!zbudpage_spin_trylock(zbudpage))) | |
912 | continue; | |
913 | /* skip an unevictable zbudpage */ | |
914 | if (unlikely(zbudpage->unevictable != 0)) { | |
915 | zbudpage_spin_unlock(zbudpage); | |
916 | continue; | |
917 | } | |
918 | /* got a locked evictable page */ | |
919 | goto evict_page; | |
920 | ||
921 | } | |
922 | unlock_out: | |
923 | /* no unlocked evictable pages, give up */ | |
924 | if (irqs_disabled) | |
925 | spin_unlock(&zbud_eph_lists_lock); | |
926 | else | |
927 | spin_unlock_bh(&zbud_eph_lists_lock); | |
928 | goto out; | |
929 | ||
930 | evict_page: | |
931 | list_del_init(&zbudpage->budlist); | |
932 | list_del_init(&zbudpage->lru); | |
933 | zbudpage_set_dying(zbudpage); | |
934 | /* | |
935 | * the zbudpage is now "dying" and attempts to read, write, | |
936 | * or delete data from it will be ignored | |
937 | */ | |
938 | if (zbudpage->zbud0_size != 0 && zbudpage->zbud1_size != 0) { | |
939 | *zsize = zbudpage->zbud0_size + zbudpage->zbud1_size - | |
940 | (2 * sizeof(struct tmem_handle)); | |
941 | *zpages = 2; | |
942 | } else if (zbudpage->zbud0_size != 0) { | |
943 | unbud[zbud_size_to_chunks(zbudpage->zbud0_size)].count--; | |
944 | *zsize = zbudpage->zbud0_size - sizeof(struct tmem_handle); | |
945 | *zpages = 1; | |
946 | } else if (zbudpage->zbud1_size != 0) { | |
947 | unbud[zbud_size_to_chunks(zbudpage->zbud1_size)].count--; | |
948 | *zsize = zbudpage->zbud1_size - sizeof(struct tmem_handle); | |
949 | *zpages = 1; | |
950 | } else { | |
951 | BUG(); | |
952 | } | |
953 | spin_unlock(&zbud_eph_lists_lock); | |
954 | zbud_eph_evicted_pageframes++; | |
955 | if (*zpages == 1) | |
956 | zbud_eph_unbuddied_count--; | |
957 | else | |
958 | zbud_eph_buddied_count--; | |
959 | zbud_evict_tmem(zbudpage); | |
960 | zbudpage_spin_lock(zbudpage); | |
961 | zbudpage_clear_dying(zbudpage); | |
962 | page = zbud_unuse_zbudpage(zbudpage, true); | |
963 | if (!irqs_disabled) | |
964 | local_bh_enable(); | |
965 | out: | |
966 | return page; | |
967 | } | |
968 | ||
969 | /* | |
970 | * Choose a persistent LRU zbudpage that is evictable (not locked), zombify it, | |
971 | * read the tmem_handle(s) out of it into the passed array, and return the | |
972 | * number of zbuds. Caller must perform necessary tmem functions and, | |
973 | * indirectly, zbud functions to fetch any valid data and cause the | |
974 | * now-zombified zbudpage to eventually be freed. We track the zombified | |
975 | * zbudpage count so it is possible to observe if there is a leak. | |
976 | FIXME: describe (ramster) case where data pointers are passed in for memcpy | |
977 | */ | |
978 | unsigned int zbud_make_zombie_lru(struct tmem_handle *th, unsigned char **data, | |
979 | unsigned int *zsize, bool eph) | |
980 | { | |
981 | struct zbudpage *zbudpage = NULL, *zbudpag2; | |
982 | struct tmem_handle *thfrom; | |
983 | char *from_va; | |
984 | void *zbpg; | |
985 | unsigned size; | |
986 | int ret = 0, i; | |
987 | spinlock_t *lists_lock = | |
988 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
989 | struct list_head *lru_list = | |
990 | eph ? &zbud_eph_lru_list : &zbud_pers_lru_list; | |
991 | ||
992 | spin_lock_bh(lists_lock); | |
993 | if (list_empty(lru_list)) | |
994 | goto out; | |
995 | list_for_each_entry_safe(zbudpage, zbudpag2, lru_list, lru) { | |
996 | /* skip a locked zbudpage */ | |
997 | if (unlikely(!zbudpage_spin_trylock(zbudpage))) | |
998 | continue; | |
999 | /* skip an unevictable zbudpage */ | |
1000 | if (unlikely(zbudpage->unevictable != 0)) { | |
1001 | zbudpage_spin_unlock(zbudpage); | |
1002 | continue; | |
1003 | } | |
1004 | /* got a locked evictable page */ | |
1005 | goto zombify_page; | |
1006 | } | |
1007 | /* no unlocked evictable pages, give up */ | |
1008 | goto out; | |
1009 | ||
1010 | zombify_page: | |
1011 | /* got an unlocked evictable page, zombify it */ | |
1012 | list_del_init(&zbudpage->budlist); | |
1013 | zbudpage_set_zombie(zbudpage); | |
1014 | /* FIXME what accounting do I need to do here? */ | |
1015 | list_del_init(&zbudpage->lru); | |
1016 | if (eph) { | |
1017 | list_add_tail(&zbudpage->lru, &zbud_eph_zombie_list); | |
1018 | zbud_eph_zombie_count = | |
1019 | atomic_inc_return(&zbud_eph_zombie_atomic); | |
1020 | } else { | |
1021 | list_add_tail(&zbudpage->lru, &zbud_pers_zombie_list); | |
1022 | zbud_pers_zombie_count = | |
1023 | atomic_inc_return(&zbud_pers_zombie_atomic); | |
1024 | } | |
1025 | /* FIXME what accounting do I need to do here? */ | |
1026 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
1027 | for (i = 0; i < 2; i++) { | |
1028 | size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size; | |
1029 | if (size) { | |
1030 | from_va = zbud_data(zbpg, i, size); | |
1031 | thfrom = (struct tmem_handle *)from_va; | |
1032 | from_va += sizeof(struct tmem_handle); | |
1033 | size -= sizeof(struct tmem_handle); | |
1034 | if (th != NULL) | |
1035 | th[ret] = *thfrom; | |
1036 | if (data != NULL) | |
1037 | memcpy(data[ret], from_va, size); | |
1038 | if (zsize != NULL) | |
1039 | *zsize++ = size; | |
1040 | ret++; | |
1041 | } | |
1042 | } | |
1043 | kunmap_zbudpage_atomic(zbpg); | |
1044 | zbudpage_spin_unlock(zbudpage); | |
1045 | out: | |
1046 | spin_unlock_bh(lists_lock); | |
1047 | return ret; | |
1048 | } | |
1049 | ||
d489082a | 1050 | void zbud_init(void) |
faca2ef7 DM |
1051 | { |
1052 | int i; | |
1053 | ||
1054 | #ifdef CONFIG_DEBUG_FS | |
1055 | zbud_debugfs_init(); | |
1056 | #endif | |
1057 | BUG_ON((sizeof(struct tmem_handle) * 2 > CHUNK_SIZE)); | |
1058 | BUG_ON(sizeof(struct zbudpage) > sizeof(struct page)); | |
1059 | for (i = 0; i < NCHUNKS; i++) { | |
1060 | INIT_LIST_HEAD(&zbud_eph_unbuddied[i].list); | |
1061 | INIT_LIST_HEAD(&zbud_pers_unbuddied[i].list); | |
1062 | } | |
1063 | } |