Commit | Line | Data |
---|---|---|
faca2ef7 DM |
1 | /* |
2 | * zbud.c - Compression buddies allocator | |
3 | * | |
4 | * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp. | |
5 | * | |
6 | * Compression buddies ("zbud") provides for efficiently packing two | |
7 | * (or, possibly in the future, more) compressed pages ("zpages") into | |
8 | * a single "raw" pageframe and for tracking both zpages and pageframes | |
9 | * so that whole pageframes can be easily reclaimed in LRU-like order. | |
10 | * It is designed to be used in conjunction with transcendent memory | |
11 | * ("tmem"); for example separate LRU lists are maintained for persistent | |
12 | * vs. ephemeral pages. | |
13 | * | |
14 | * A zbudpage is an overlay for a struct page and thus each zbudpage | |
15 | * refers to a physical pageframe of RAM. When the caller passes a | |
16 | * struct page from the kernel's page allocator, zbud "transforms" it | |
17 | * to a zbudpage which sets/uses a different set of fields than the | |
18 | * struct-page and thus must "untransform" it back by reinitializing | |
19 | * certain fields before the struct-page can be freed. The fields | |
20 | * of a zbudpage include a page lock for controlling access to the | |
21 | * corresponding pageframe, and there is a size field for each zpage. | |
22 | * Each zbudpage also lives on two linked lists: a "budlist" which is | |
23 | * used to support efficient buddying of zpages; and an "lru" which | |
24 | * is used for reclaiming pageframes in approximately least-recently-used | |
25 | * order. | |
26 | * | |
27 | * A zbudpageframe is a pageframe divided up into aligned 64-byte "chunks" | |
28 | * which contain the compressed data for zero, one, or two zbuds. Contained | |
29 | * with the compressed data is a tmem_handle which is a key to allow | |
30 | * the same data to be found via the tmem interface so the zpage can | |
31 | * be invalidated (for ephemeral pages) or repatriated to the swap cache | |
32 | * (for persistent pages). The contents of a zbudpageframe must never | |
33 | * be accessed without holding the page lock for the corresponding | |
34 | * zbudpage and, to accomodate highmem machines, the contents may | |
35 | * only be examined or changes when kmapped. Thus, when in use, a | |
36 | * kmapped zbudpageframe is referred to in the zbud code as "void *zbpg". | |
37 | * | |
38 | * Note that the term "zbud" refers to the combination of a zpage and | |
39 | * a tmem_handle that is stored as one of possibly two "buddied" zpages; | |
40 | * it also generically refers to this allocator... sorry for any confusion. | |
41 | * | |
42 | * A zbudref is a pointer to a struct zbudpage (which can be cast to a | |
43 | * struct page), with the LSB either cleared or set to indicate, respectively, | |
44 | * the first or second zpage in the zbudpageframe. Since a zbudref can be | |
45 | * cast to a pointer, it is used as the tmem "pampd" pointer and uniquely | |
46 | * references a stored tmem page and so is the only zbud data structure | |
47 | * externally visible to zbud.c/zbud.h. | |
48 | * | |
49 | * Since we wish to reclaim entire pageframes but zpages may be randomly | |
50 | * added and deleted to any given pageframe, we approximate LRU by | |
51 | * promoting a pageframe to MRU when a zpage is added to it, but | |
52 | * leaving it at the current place in the list when a zpage is deleted | |
53 | * from it. As a side effect, zpages that are difficult to buddy (e.g. | |
54 | * very large paages) will be reclaimed faster than average, which seems | |
55 | * reasonable. | |
56 | * | |
57 | * In the current implementation, no more than two zpages may be stored in | |
58 | * any pageframe and no zpage ever crosses a pageframe boundary. While | |
59 | * other zpage allocation mechanisms may allow greater density, this two | |
60 | * zpage-per-pageframe limit both ensures simple reclaim of pageframes | |
61 | * (including garbage collection of references to the contents of those | |
62 | * pageframes from tmem data structures) AND avoids the need for compaction. | |
63 | * With additional complexity, zbud could be modified to support storing | |
64 | * up to three zpages per pageframe or, to handle larger average zpages, | |
65 | * up to three zpages per pair of pageframes, but it is not clear if the | |
66 | * additional complexity would be worth it. So consider it an exercise | |
67 | * for future developers. | |
68 | * | |
69 | * Note also that zbud does no page allocation or freeing. This is so | |
70 | * that the caller has complete control over and, for accounting, visibility | |
71 | * into if/when pages are allocated and freed. | |
72 | * | |
73 | * Finally, note that zbud limits the size of zpages it can store; the | |
74 | * caller must check the zpage size with zbud_max_buddy_size before | |
75 | * storing it, else BUGs will result. User beware. | |
76 | */ | |
77 | ||
78 | #include <linux/module.h> | |
79 | #include <linux/highmem.h> | |
80 | #include <linux/list.h> | |
81 | #include <linux/spinlock.h> | |
82 | #include <linux/pagemap.h> | |
83 | #include <linux/atomic.h> | |
84 | #include <linux/bug.h> | |
85 | #include "tmem.h" | |
86 | #include "zcache.h" | |
87 | #include "zbud.h" | |
88 | ||
89 | /* | |
90 | * We need to ensure that a struct zbudpage is never larger than a | |
91 | * struct page. This is checked with a BUG_ON in zbud_init. | |
92 | * | |
93 | * The unevictable field indicates that a zbud is being added to the | |
94 | * zbudpage. Since this is a two-phase process (due to tmem locking), | |
95 | * this field locks the zbudpage against eviction when a zbud match | |
96 | * or creation is in process. Since this addition process may occur | |
97 | * in parallel for two zbuds in one zbudpage, the field is a counter | |
98 | * that must not exceed two. | |
99 | */ | |
100 | struct zbudpage { | |
101 | union { | |
102 | struct page page; | |
103 | struct { | |
104 | unsigned long space_for_flags; | |
105 | struct { | |
106 | unsigned zbud0_size:12; | |
107 | unsigned zbud1_size:12; | |
108 | unsigned unevictable:2; | |
109 | }; | |
110 | struct list_head budlist; | |
111 | struct list_head lru; | |
112 | }; | |
113 | }; | |
114 | }; | |
115 | ||
116 | struct zbudref { | |
117 | union { | |
118 | struct zbudpage *zbudpage; | |
119 | unsigned long zbudref; | |
120 | }; | |
121 | }; | |
122 | ||
123 | #define CHUNK_SHIFT 6 | |
124 | #define CHUNK_SIZE (1 << CHUNK_SHIFT) | |
125 | #define CHUNK_MASK (~(CHUNK_SIZE-1)) | |
126 | #define NCHUNKS (PAGE_SIZE >> CHUNK_SHIFT) | |
127 | #define MAX_CHUNK (NCHUNKS-1) | |
128 | ||
129 | /* | |
130 | * The following functions deal with the difference between struct | |
131 | * page and struct zbudpage. Note the hack of using the pageflags | |
132 | * from struct page; this is to avoid duplicating all the complex | |
133 | * pageflag macros. | |
134 | */ | |
135 | static inline void zbudpage_spin_lock(struct zbudpage *zbudpage) | |
136 | { | |
137 | struct page *page = (struct page *)zbudpage; | |
138 | ||
139 | while (unlikely(test_and_set_bit_lock(PG_locked, &page->flags))) { | |
140 | do { | |
141 | cpu_relax(); | |
142 | } while (test_bit(PG_locked, &page->flags)); | |
143 | } | |
144 | } | |
145 | ||
146 | static inline void zbudpage_spin_unlock(struct zbudpage *zbudpage) | |
147 | { | |
148 | struct page *page = (struct page *)zbudpage; | |
149 | ||
150 | clear_bit(PG_locked, &page->flags); | |
151 | } | |
152 | ||
153 | static inline int zbudpage_spin_trylock(struct zbudpage *zbudpage) | |
154 | { | |
155 | return trylock_page((struct page *)zbudpage); | |
156 | } | |
157 | ||
158 | static inline int zbudpage_is_locked(struct zbudpage *zbudpage) | |
159 | { | |
160 | return PageLocked((struct page *)zbudpage); | |
161 | } | |
162 | ||
163 | static inline void *kmap_zbudpage_atomic(struct zbudpage *zbudpage) | |
164 | { | |
165 | return kmap_atomic((struct page *)zbudpage); | |
166 | } | |
167 | ||
168 | /* | |
169 | * A dying zbudpage is an ephemeral page in the process of being evicted. | |
170 | * Any data contained in the zbudpage is invalid and we are just waiting for | |
171 | * the tmem pampds to be invalidated before freeing the page | |
172 | */ | |
173 | static inline int zbudpage_is_dying(struct zbudpage *zbudpage) | |
174 | { | |
175 | struct page *page = (struct page *)zbudpage; | |
176 | ||
177 | return test_bit(PG_reclaim, &page->flags); | |
178 | } | |
179 | ||
180 | static inline void zbudpage_set_dying(struct zbudpage *zbudpage) | |
181 | { | |
182 | struct page *page = (struct page *)zbudpage; | |
183 | ||
184 | set_bit(PG_reclaim, &page->flags); | |
185 | } | |
186 | ||
187 | static inline void zbudpage_clear_dying(struct zbudpage *zbudpage) | |
188 | { | |
189 | struct page *page = (struct page *)zbudpage; | |
190 | ||
191 | clear_bit(PG_reclaim, &page->flags); | |
192 | } | |
193 | ||
194 | /* | |
195 | * A zombie zbudpage is a persistent page in the process of being evicted. | |
196 | * The data contained in the zbudpage is valid and we are just waiting for | |
197 | * the tmem pampds to be invalidated before freeing the page | |
198 | */ | |
199 | static inline int zbudpage_is_zombie(struct zbudpage *zbudpage) | |
200 | { | |
201 | struct page *page = (struct page *)zbudpage; | |
202 | ||
203 | return test_bit(PG_dirty, &page->flags); | |
204 | } | |
205 | ||
206 | static inline void zbudpage_set_zombie(struct zbudpage *zbudpage) | |
207 | { | |
208 | struct page *page = (struct page *)zbudpage; | |
209 | ||
210 | set_bit(PG_dirty, &page->flags); | |
211 | } | |
212 | ||
213 | static inline void zbudpage_clear_zombie(struct zbudpage *zbudpage) | |
214 | { | |
215 | struct page *page = (struct page *)zbudpage; | |
216 | ||
217 | clear_bit(PG_dirty, &page->flags); | |
218 | } | |
219 | ||
220 | static inline void kunmap_zbudpage_atomic(void *zbpg) | |
221 | { | |
222 | kunmap_atomic(zbpg); | |
223 | } | |
224 | ||
225 | /* | |
226 | * zbud "translation" and helper functions | |
227 | */ | |
228 | ||
229 | static inline struct zbudpage *zbudref_to_zbudpage(struct zbudref *zref) | |
230 | { | |
231 | unsigned long zbud = (unsigned long)zref; | |
232 | zbud &= ~1UL; | |
233 | return (struct zbudpage *)zbud; | |
234 | } | |
235 | ||
236 | static inline struct zbudref *zbudpage_to_zbudref(struct zbudpage *zbudpage, | |
237 | unsigned budnum) | |
238 | { | |
239 | unsigned long zbud = (unsigned long)zbudpage; | |
240 | BUG_ON(budnum > 1); | |
241 | zbud |= budnum; | |
242 | return (struct zbudref *)zbud; | |
243 | } | |
244 | ||
245 | static inline int zbudref_budnum(struct zbudref *zbudref) | |
246 | { | |
247 | unsigned long zbud = (unsigned long)zbudref; | |
248 | return zbud & 1UL; | |
249 | } | |
250 | ||
251 | static inline unsigned zbud_max_size(void) | |
252 | { | |
253 | return MAX_CHUNK << CHUNK_SHIFT; | |
254 | } | |
255 | ||
256 | static inline unsigned zbud_size_to_chunks(unsigned size) | |
257 | { | |
258 | BUG_ON(size == 0 || size > zbud_max_size()); | |
259 | return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; | |
260 | } | |
261 | ||
262 | /* can only be used between kmap_zbudpage_atomic/kunmap_zbudpage_atomic! */ | |
263 | static inline char *zbud_data(void *zbpg, | |
264 | unsigned budnum, unsigned size) | |
265 | { | |
266 | char *p; | |
267 | ||
268 | BUG_ON(size == 0 || size > zbud_max_size()); | |
269 | p = (char *)zbpg; | |
270 | if (budnum == 1) | |
271 | p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); | |
272 | return p; | |
273 | } | |
274 | ||
275 | /* | |
276 | * These are all informative and exposed through debugfs... except for | |
277 | * the arrays... anyone know how to do that? To avoid confusion for | |
278 | * debugfs viewers, some of these should also be atomic_long_t, but | |
279 | * I don't know how to expose atomics via debugfs either... | |
280 | */ | |
281 | static unsigned long zbud_eph_pageframes; | |
282 | static unsigned long zbud_pers_pageframes; | |
283 | static unsigned long zbud_eph_zpages; | |
284 | static unsigned long zbud_pers_zpages; | |
285 | static u64 zbud_eph_zbytes; | |
286 | static u64 zbud_pers_zbytes; | |
287 | static unsigned long zbud_eph_evicted_pageframes; | |
288 | static unsigned long zbud_pers_evicted_pageframes; | |
289 | static unsigned long zbud_eph_cumul_zpages; | |
290 | static unsigned long zbud_pers_cumul_zpages; | |
291 | static u64 zbud_eph_cumul_zbytes; | |
292 | static u64 zbud_pers_cumul_zbytes; | |
293 | static unsigned long zbud_eph_cumul_chunk_counts[NCHUNKS]; | |
294 | static unsigned long zbud_pers_cumul_chunk_counts[NCHUNKS]; | |
295 | static unsigned long zbud_eph_buddied_count; | |
296 | static unsigned long zbud_pers_buddied_count; | |
297 | static unsigned long zbud_eph_unbuddied_count; | |
298 | static unsigned long zbud_pers_unbuddied_count; | |
299 | static unsigned long zbud_eph_zombie_count; | |
300 | static unsigned long zbud_pers_zombie_count; | |
301 | static atomic_t zbud_eph_zombie_atomic; | |
302 | static atomic_t zbud_pers_zombie_atomic; | |
303 | ||
304 | #ifdef CONFIG_DEBUG_FS | |
305 | #include <linux/debugfs.h> | |
306 | #define zdfs debugfs_create_size_t | |
307 | #define zdfs64 debugfs_create_u64 | |
308 | static int zbud_debugfs_init(void) | |
309 | { | |
310 | struct dentry *root = debugfs_create_dir("zbud", NULL); | |
311 | if (root == NULL) | |
312 | return -ENXIO; | |
313 | ||
314 | /* | |
315 | * would be nice to dump the sizes of the unbuddied | |
316 | * arrays, like was done with sysfs, but it doesn't | |
317 | * look like debugfs is flexible enough to do that | |
318 | */ | |
319 | zdfs64("eph_zbytes", S_IRUGO, root, &zbud_eph_zbytes); | |
320 | zdfs64("eph_cumul_zbytes", S_IRUGO, root, &zbud_eph_cumul_zbytes); | |
321 | zdfs64("pers_zbytes", S_IRUGO, root, &zbud_pers_zbytes); | |
322 | zdfs64("pers_cumul_zbytes", S_IRUGO, root, &zbud_pers_cumul_zbytes); | |
323 | zdfs("eph_cumul_zpages", S_IRUGO, root, &zbud_eph_cumul_zpages); | |
324 | zdfs("eph_evicted_pageframes", S_IRUGO, root, | |
325 | &zbud_eph_evicted_pageframes); | |
326 | zdfs("eph_zpages", S_IRUGO, root, &zbud_eph_zpages); | |
327 | zdfs("eph_pageframes", S_IRUGO, root, &zbud_eph_pageframes); | |
328 | zdfs("eph_buddied_count", S_IRUGO, root, &zbud_eph_buddied_count); | |
329 | zdfs("eph_unbuddied_count", S_IRUGO, root, &zbud_eph_unbuddied_count); | |
330 | zdfs("pers_cumul_zpages", S_IRUGO, root, &zbud_pers_cumul_zpages); | |
331 | zdfs("pers_evicted_pageframes", S_IRUGO, root, | |
332 | &zbud_pers_evicted_pageframes); | |
333 | zdfs("pers_zpages", S_IRUGO, root, &zbud_pers_zpages); | |
334 | zdfs("pers_pageframes", S_IRUGO, root, &zbud_pers_pageframes); | |
335 | zdfs("pers_buddied_count", S_IRUGO, root, &zbud_pers_buddied_count); | |
336 | zdfs("pers_unbuddied_count", S_IRUGO, root, &zbud_pers_unbuddied_count); | |
337 | zdfs("pers_zombie_count", S_IRUGO, root, &zbud_pers_zombie_count); | |
338 | return 0; | |
339 | } | |
340 | #undef zdfs | |
341 | #undef zdfs64 | |
342 | #endif | |
343 | ||
344 | /* protects the buddied list and all unbuddied lists */ | |
345 | static DEFINE_SPINLOCK(zbud_eph_lists_lock); | |
346 | static DEFINE_SPINLOCK(zbud_pers_lists_lock); | |
347 | ||
348 | struct zbud_unbuddied { | |
349 | struct list_head list; | |
350 | unsigned count; | |
351 | }; | |
352 | ||
353 | /* list N contains pages with N chunks USED and NCHUNKS-N unused */ | |
354 | /* element 0 is never used but optimizing that isn't worth it */ | |
355 | static struct zbud_unbuddied zbud_eph_unbuddied[NCHUNKS]; | |
356 | static struct zbud_unbuddied zbud_pers_unbuddied[NCHUNKS]; | |
357 | static LIST_HEAD(zbud_eph_lru_list); | |
358 | static LIST_HEAD(zbud_pers_lru_list); | |
359 | static LIST_HEAD(zbud_eph_buddied_list); | |
360 | static LIST_HEAD(zbud_pers_buddied_list); | |
361 | static LIST_HEAD(zbud_eph_zombie_list); | |
362 | static LIST_HEAD(zbud_pers_zombie_list); | |
363 | ||
364 | /* | |
365 | * Given a struct page, transform it to a zbudpage so that it can be | |
366 | * used by zbud and initialize fields as necessary. | |
367 | */ | |
368 | static inline struct zbudpage *zbud_init_zbudpage(struct page *page, bool eph) | |
369 | { | |
370 | struct zbudpage *zbudpage = (struct zbudpage *)page; | |
371 | ||
372 | BUG_ON(page == NULL); | |
373 | INIT_LIST_HEAD(&zbudpage->budlist); | |
374 | INIT_LIST_HEAD(&zbudpage->lru); | |
375 | zbudpage->zbud0_size = 0; | |
376 | zbudpage->zbud1_size = 0; | |
377 | zbudpage->unevictable = 0; | |
378 | if (eph) | |
379 | zbud_eph_pageframes++; | |
380 | else | |
381 | zbud_pers_pageframes++; | |
382 | return zbudpage; | |
383 | } | |
384 | ||
385 | /* "Transform" a zbudpage back to a struct page suitable to free. */ | |
386 | static inline struct page *zbud_unuse_zbudpage(struct zbudpage *zbudpage, | |
387 | bool eph) | |
388 | { | |
389 | struct page *page = (struct page *)zbudpage; | |
390 | ||
391 | BUG_ON(!list_empty(&zbudpage->budlist)); | |
392 | BUG_ON(!list_empty(&zbudpage->lru)); | |
393 | BUG_ON(zbudpage->zbud0_size != 0); | |
394 | BUG_ON(zbudpage->zbud1_size != 0); | |
395 | BUG_ON(!PageLocked(page)); | |
396 | BUG_ON(zbudpage->unevictable != 0); | |
397 | BUG_ON(zbudpage_is_dying(zbudpage)); | |
398 | BUG_ON(zbudpage_is_zombie(zbudpage)); | |
399 | if (eph) | |
400 | zbud_eph_pageframes--; | |
401 | else | |
402 | zbud_pers_pageframes--; | |
403 | zbudpage_spin_unlock(zbudpage); | |
404 | reset_page_mapcount(page); | |
405 | init_page_count(page); | |
406 | page->index = 0; | |
407 | return page; | |
408 | } | |
409 | ||
410 | /* Mark a zbud as unused and do accounting */ | |
411 | static inline void zbud_unuse_zbud(struct zbudpage *zbudpage, | |
412 | int budnum, bool eph) | |
413 | { | |
414 | unsigned size; | |
415 | ||
416 | BUG_ON(!zbudpage_is_locked(zbudpage)); | |
417 | if (budnum == 0) { | |
418 | size = zbudpage->zbud0_size; | |
419 | zbudpage->zbud0_size = 0; | |
420 | } else { | |
421 | size = zbudpage->zbud1_size; | |
422 | zbudpage->zbud1_size = 0; | |
423 | } | |
424 | if (eph) { | |
425 | zbud_eph_zbytes -= size; | |
426 | zbud_eph_zpages--; | |
427 | } else { | |
428 | zbud_pers_zbytes -= size; | |
429 | zbud_pers_zpages--; | |
430 | } | |
431 | } | |
432 | ||
433 | /* | |
434 | * Given a zbudpage/budnum/size, a tmem handle, and a kmapped pointer | |
435 | * to some data, set up the zbud appropriately including data copying | |
436 | * and accounting. Note that if cdata is NULL, the data copying is | |
437 | * skipped. (This is useful for lazy writes such as for RAMster.) | |
438 | */ | |
439 | static void zbud_init_zbud(struct zbudpage *zbudpage, struct tmem_handle *th, | |
440 | bool eph, void *cdata, | |
441 | unsigned budnum, unsigned size) | |
442 | { | |
443 | char *to; | |
444 | void *zbpg; | |
445 | struct tmem_handle *to_th; | |
446 | unsigned nchunks = zbud_size_to_chunks(size); | |
447 | ||
448 | BUG_ON(!zbudpage_is_locked(zbudpage)); | |
449 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
450 | to = zbud_data(zbpg, budnum, size); | |
451 | to_th = (struct tmem_handle *)to; | |
452 | to_th->index = th->index; | |
453 | to_th->oid = th->oid; | |
454 | to_th->pool_id = th->pool_id; | |
455 | to_th->client_id = th->client_id; | |
456 | to += sizeof(struct tmem_handle); | |
457 | if (cdata != NULL) | |
458 | memcpy(to, cdata, size - sizeof(struct tmem_handle)); | |
459 | kunmap_zbudpage_atomic(zbpg); | |
460 | if (budnum == 0) | |
461 | zbudpage->zbud0_size = size; | |
462 | else | |
463 | zbudpage->zbud1_size = size; | |
464 | if (eph) { | |
465 | zbud_eph_cumul_chunk_counts[nchunks]++; | |
466 | zbud_eph_zpages++; | |
467 | zbud_eph_cumul_zpages++; | |
468 | zbud_eph_zbytes += size; | |
469 | zbud_eph_cumul_zbytes += size; | |
470 | } else { | |
471 | zbud_pers_cumul_chunk_counts[nchunks]++; | |
472 | zbud_pers_zpages++; | |
473 | zbud_pers_cumul_zpages++; | |
474 | zbud_pers_zbytes += size; | |
475 | zbud_pers_cumul_zbytes += size; | |
476 | } | |
477 | } | |
478 | ||
479 | /* | |
480 | * Given a locked dying zbudpage, read out the tmem handles from the data, | |
481 | * unlock the page, then use the handles to tell tmem to flush out its | |
482 | * references | |
483 | */ | |
484 | static void zbud_evict_tmem(struct zbudpage *zbudpage) | |
485 | { | |
486 | int i, j; | |
487 | uint32_t pool_id[2], client_id[2]; | |
488 | uint32_t index[2]; | |
489 | struct tmem_oid oid[2]; | |
490 | struct tmem_pool *pool; | |
491 | void *zbpg; | |
492 | struct tmem_handle *th; | |
493 | unsigned size; | |
494 | ||
495 | /* read out the tmem handles from the data and set aside */ | |
496 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
497 | for (i = 0, j = 0; i < 2; i++) { | |
498 | size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size; | |
499 | if (size) { | |
500 | th = (struct tmem_handle *)zbud_data(zbpg, i, size); | |
501 | client_id[j] = th->client_id; | |
502 | pool_id[j] = th->pool_id; | |
503 | oid[j] = th->oid; | |
504 | index[j] = th->index; | |
505 | j++; | |
506 | zbud_unuse_zbud(zbudpage, i, true); | |
507 | } | |
508 | } | |
509 | kunmap_zbudpage_atomic(zbpg); | |
510 | zbudpage_spin_unlock(zbudpage); | |
511 | /* zbudpage is now an unlocked dying... tell tmem to flush pointers */ | |
512 | for (i = 0; i < j; i++) { | |
513 | pool = zcache_get_pool_by_id(client_id[i], pool_id[i]); | |
514 | if (pool != NULL) { | |
515 | tmem_flush_page(pool, &oid[i], index[i]); | |
516 | zcache_put_pool(pool); | |
517 | } | |
518 | } | |
519 | } | |
520 | ||
521 | /* | |
522 | * Externally callable zbud handling routines. | |
523 | */ | |
524 | ||
525 | /* | |
526 | * Return the maximum size compressed page that can be stored (secretly | |
527 | * setting aside space for the tmem handle. | |
528 | */ | |
529 | unsigned int zbud_max_buddy_size(void) | |
530 | { | |
531 | return zbud_max_size() - sizeof(struct tmem_handle); | |
532 | } | |
533 | ||
534 | /* | |
535 | * Given a zbud reference, free the corresponding zbud from all lists, | |
536 | * mark it as unused, do accounting, and if the freeing of the zbud | |
537 | * frees up an entire pageframe, return it to the caller (else NULL). | |
538 | */ | |
539 | struct page *zbud_free_and_delist(struct zbudref *zref, bool eph, | |
540 | unsigned int *zsize, unsigned int *zpages) | |
541 | { | |
542 | unsigned long budnum = zbudref_budnum(zref); | |
543 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
544 | struct page *page = NULL; | |
545 | unsigned chunks, bud_size, other_bud_size; | |
546 | spinlock_t *lists_lock = | |
547 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
548 | struct zbud_unbuddied *unbud = | |
549 | eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; | |
550 | ||
551 | ||
552 | spin_lock(lists_lock); | |
553 | zbudpage_spin_lock(zbudpage); | |
554 | if (zbudpage_is_dying(zbudpage)) { | |
555 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
556 | zbudpage_spin_unlock(zbudpage); | |
557 | spin_unlock(lists_lock); | |
558 | *zpages = 0; | |
559 | *zsize = 0; | |
560 | goto out; | |
561 | } | |
562 | if (budnum == 0) { | |
563 | bud_size = zbudpage->zbud0_size; | |
564 | other_bud_size = zbudpage->zbud1_size; | |
565 | } else { | |
566 | bud_size = zbudpage->zbud1_size; | |
567 | other_bud_size = zbudpage->zbud0_size; | |
568 | } | |
569 | *zsize = bud_size - sizeof(struct tmem_handle); | |
570 | *zpages = 1; | |
571 | zbud_unuse_zbud(zbudpage, budnum, eph); | |
572 | if (other_bud_size == 0) { /* was unbuddied: unlist and free */ | |
573 | chunks = zbud_size_to_chunks(bud_size) ; | |
574 | if (zbudpage_is_zombie(zbudpage)) { | |
575 | if (eph) | |
576 | zbud_pers_zombie_count = | |
577 | atomic_dec_return(&zbud_eph_zombie_atomic); | |
578 | else | |
579 | zbud_pers_zombie_count = | |
580 | atomic_dec_return(&zbud_pers_zombie_atomic); | |
581 | zbudpage_clear_zombie(zbudpage); | |
582 | } else { | |
583 | BUG_ON(list_empty(&unbud[chunks].list)); | |
584 | list_del_init(&zbudpage->budlist); | |
585 | unbud[chunks].count--; | |
586 | } | |
587 | list_del_init(&zbudpage->lru); | |
588 | spin_unlock(lists_lock); | |
589 | if (eph) | |
590 | zbud_eph_unbuddied_count--; | |
591 | else | |
592 | zbud_pers_unbuddied_count--; | |
593 | page = zbud_unuse_zbudpage(zbudpage, eph); | |
594 | } else { /* was buddied: move remaining buddy to unbuddied list */ | |
595 | chunks = zbud_size_to_chunks(other_bud_size) ; | |
596 | if (!zbudpage_is_zombie(zbudpage)) { | |
597 | list_del_init(&zbudpage->budlist); | |
598 | list_add_tail(&zbudpage->budlist, &unbud[chunks].list); | |
599 | unbud[chunks].count++; | |
600 | } | |
601 | if (eph) { | |
602 | zbud_eph_buddied_count--; | |
603 | zbud_eph_unbuddied_count++; | |
604 | } else { | |
605 | zbud_pers_unbuddied_count++; | |
606 | zbud_pers_buddied_count--; | |
607 | } | |
608 | /* don't mess with lru, no need to move it */ | |
609 | zbudpage_spin_unlock(zbudpage); | |
610 | spin_unlock(lists_lock); | |
611 | } | |
612 | out: | |
613 | return page; | |
614 | } | |
615 | ||
616 | /* | |
617 | * Given a tmem handle, and a kmapped pointer to compressed data of | |
618 | * the given size, try to find an unbuddied zbudpage in which to | |
619 | * create a zbud. If found, put it there, mark the zbudpage unevictable, | |
620 | * and return a zbudref to it. Else return NULL. | |
621 | */ | |
622 | struct zbudref *zbud_match_prep(struct tmem_handle *th, bool eph, | |
623 | void *cdata, unsigned size) | |
624 | { | |
625 | struct zbudpage *zbudpage = NULL, *zbudpage2; | |
626 | unsigned long budnum = 0UL; | |
627 | unsigned nchunks; | |
628 | int i, found_good_buddy = 0; | |
629 | spinlock_t *lists_lock = | |
630 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
631 | struct zbud_unbuddied *unbud = | |
632 | eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; | |
633 | ||
634 | size += sizeof(struct tmem_handle); | |
635 | nchunks = zbud_size_to_chunks(size); | |
636 | for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { | |
637 | spin_lock(lists_lock); | |
638 | if (!list_empty(&unbud[i].list)) { | |
639 | list_for_each_entry_safe(zbudpage, zbudpage2, | |
640 | &unbud[i].list, budlist) { | |
641 | if (zbudpage_spin_trylock(zbudpage)) { | |
642 | found_good_buddy = i; | |
643 | goto found_unbuddied; | |
644 | } | |
645 | } | |
646 | } | |
647 | spin_unlock(lists_lock); | |
648 | } | |
649 | zbudpage = NULL; | |
650 | goto out; | |
651 | ||
652 | found_unbuddied: | |
653 | BUG_ON(!zbudpage_is_locked(zbudpage)); | |
654 | BUG_ON(!((zbudpage->zbud0_size == 0) ^ (zbudpage->zbud1_size == 0))); | |
655 | if (zbudpage->zbud0_size == 0) | |
656 | budnum = 0UL; | |
657 | else if (zbudpage->zbud1_size == 0) | |
658 | budnum = 1UL; | |
659 | list_del_init(&zbudpage->budlist); | |
660 | if (eph) { | |
661 | list_add_tail(&zbudpage->budlist, &zbud_eph_buddied_list); | |
662 | unbud[found_good_buddy].count--; | |
663 | zbud_eph_unbuddied_count--; | |
664 | zbud_eph_buddied_count++; | |
665 | /* "promote" raw zbudpage to most-recently-used */ | |
666 | list_del_init(&zbudpage->lru); | |
667 | list_add_tail(&zbudpage->lru, &zbud_eph_lru_list); | |
668 | } else { | |
669 | list_add_tail(&zbudpage->budlist, &zbud_pers_buddied_list); | |
670 | unbud[found_good_buddy].count--; | |
671 | zbud_pers_unbuddied_count--; | |
672 | zbud_pers_buddied_count++; | |
673 | /* "promote" raw zbudpage to most-recently-used */ | |
674 | list_del_init(&zbudpage->lru); | |
675 | list_add_tail(&zbudpage->lru, &zbud_pers_lru_list); | |
676 | } | |
677 | zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size); | |
678 | zbudpage->unevictable++; | |
679 | BUG_ON(zbudpage->unevictable == 3); | |
680 | zbudpage_spin_unlock(zbudpage); | |
681 | spin_unlock(lists_lock); | |
682 | out: | |
683 | return zbudpage_to_zbudref(zbudpage, budnum); | |
684 | ||
685 | } | |
686 | ||
687 | /* | |
688 | * Given a tmem handle, and a kmapped pointer to compressed data of | |
689 | * the given size, and a newly allocated struct page, create an unevictable | |
690 | * zbud in that new page and return a zbudref to it. | |
691 | */ | |
692 | struct zbudref *zbud_create_prep(struct tmem_handle *th, bool eph, | |
693 | void *cdata, unsigned size, | |
694 | struct page *newpage) | |
695 | { | |
696 | struct zbudpage *zbudpage; | |
697 | unsigned long budnum = 0; | |
698 | unsigned nchunks; | |
699 | spinlock_t *lists_lock = | |
700 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
701 | struct zbud_unbuddied *unbud = | |
702 | eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; | |
703 | ||
704 | #if 0 | |
705 | /* this may be worth it later to support decompress-in-place? */ | |
706 | static unsigned long counter; | |
707 | budnum = counter++ & 1; /* alternate using zbud0 and zbud1 */ | |
708 | #endif | |
709 | ||
710 | if (size > zbud_max_buddy_size()) | |
711 | return NULL; | |
712 | if (newpage == NULL) | |
713 | return NULL; | |
714 | ||
715 | size += sizeof(struct tmem_handle); | |
716 | nchunks = zbud_size_to_chunks(size) ; | |
717 | spin_lock(lists_lock); | |
718 | zbudpage = zbud_init_zbudpage(newpage, eph); | |
719 | zbudpage_spin_lock(zbudpage); | |
720 | list_add_tail(&zbudpage->budlist, &unbud[nchunks].list); | |
721 | if (eph) { | |
722 | list_add_tail(&zbudpage->lru, &zbud_eph_lru_list); | |
723 | zbud_eph_unbuddied_count++; | |
724 | } else { | |
725 | list_add_tail(&zbudpage->lru, &zbud_pers_lru_list); | |
726 | zbud_pers_unbuddied_count++; | |
727 | } | |
728 | unbud[nchunks].count++; | |
729 | zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size); | |
730 | zbudpage->unevictable++; | |
731 | BUG_ON(zbudpage->unevictable == 3); | |
732 | zbudpage_spin_unlock(zbudpage); | |
733 | spin_unlock(lists_lock); | |
734 | return zbudpage_to_zbudref(zbudpage, budnum); | |
735 | } | |
736 | ||
737 | /* | |
738 | * Finish creation of a zbud by, assuming another zbud isn't being created | |
739 | * in parallel, marking it evictable. | |
740 | */ | |
741 | void zbud_create_finish(struct zbudref *zref, bool eph) | |
742 | { | |
743 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
744 | spinlock_t *lists_lock = | |
745 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
746 | ||
747 | spin_lock(lists_lock); | |
748 | zbudpage_spin_lock(zbudpage); | |
749 | BUG_ON(zbudpage_is_dying(zbudpage)); | |
750 | zbudpage->unevictable--; | |
751 | BUG_ON((int)zbudpage->unevictable < 0); | |
752 | zbudpage_spin_unlock(zbudpage); | |
753 | spin_unlock(lists_lock); | |
754 | } | |
755 | ||
756 | /* | |
757 | * Given a zbudref and a struct page, decompress the data from | |
758 | * the zbud into the physical page represented by the struct page | |
759 | * by upcalling to zcache_decompress | |
760 | */ | |
761 | int zbud_decompress(struct page *data_page, struct zbudref *zref, bool eph, | |
762 | void (*decompress)(char *, unsigned int, char *)) | |
763 | { | |
764 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
765 | unsigned long budnum = zbudref_budnum(zref); | |
766 | void *zbpg; | |
767 | char *to_va, *from_va; | |
768 | unsigned size; | |
769 | int ret = -1; | |
770 | spinlock_t *lists_lock = | |
771 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
772 | ||
773 | spin_lock(lists_lock); | |
774 | zbudpage_spin_lock(zbudpage); | |
775 | if (zbudpage_is_dying(zbudpage)) { | |
776 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
777 | goto out; | |
778 | } | |
779 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
780 | to_va = kmap_atomic(data_page); | |
781 | if (budnum == 0) | |
782 | size = zbudpage->zbud0_size; | |
783 | else | |
784 | size = zbudpage->zbud1_size; | |
785 | BUG_ON(size == 0 || size > zbud_max_size()); | |
786 | from_va = zbud_data(zbpg, budnum, size); | |
787 | from_va += sizeof(struct tmem_handle); | |
788 | size -= sizeof(struct tmem_handle); | |
789 | decompress(from_va, size, to_va); | |
790 | kunmap_atomic(to_va); | |
791 | kunmap_zbudpage_atomic(zbpg); | |
792 | ret = 0; | |
793 | out: | |
794 | zbudpage_spin_unlock(zbudpage); | |
795 | spin_unlock(lists_lock); | |
796 | return ret; | |
797 | } | |
798 | ||
799 | /* | |
800 | * Given a zbudref and a kernel pointer, copy the data from | |
801 | * the zbud to the kernel pointer. | |
802 | */ | |
803 | int zbud_copy_from_zbud(char *to_va, struct zbudref *zref, | |
804 | size_t *sizep, bool eph) | |
805 | { | |
806 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
807 | unsigned long budnum = zbudref_budnum(zref); | |
808 | void *zbpg; | |
809 | char *from_va; | |
810 | unsigned size; | |
811 | int ret = -1; | |
812 | spinlock_t *lists_lock = | |
813 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
814 | ||
815 | spin_lock(lists_lock); | |
816 | zbudpage_spin_lock(zbudpage); | |
817 | if (zbudpage_is_dying(zbudpage)) { | |
818 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
819 | goto out; | |
820 | } | |
821 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
822 | if (budnum == 0) | |
823 | size = zbudpage->zbud0_size; | |
824 | else | |
825 | size = zbudpage->zbud1_size; | |
826 | BUG_ON(size == 0 || size > zbud_max_size()); | |
827 | from_va = zbud_data(zbpg, budnum, size); | |
828 | from_va += sizeof(struct tmem_handle); | |
829 | size -= sizeof(struct tmem_handle); | |
830 | *sizep = size; | |
831 | memcpy(to_va, from_va, size); | |
832 | ||
833 | kunmap_zbudpage_atomic(zbpg); | |
834 | ret = 0; | |
835 | out: | |
836 | zbudpage_spin_unlock(zbudpage); | |
837 | spin_unlock(lists_lock); | |
838 | return ret; | |
839 | } | |
840 | ||
841 | /* | |
842 | * Given a zbudref and a kernel pointer, copy the data from | |
843 | * the kernel pointer to the zbud. | |
844 | */ | |
845 | int zbud_copy_to_zbud(struct zbudref *zref, char *from_va, bool eph) | |
846 | { | |
847 | struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); | |
848 | unsigned long budnum = zbudref_budnum(zref); | |
849 | void *zbpg; | |
850 | char *to_va; | |
851 | unsigned size; | |
852 | int ret = -1; | |
853 | spinlock_t *lists_lock = | |
854 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
855 | ||
856 | spin_lock(lists_lock); | |
857 | zbudpage_spin_lock(zbudpage); | |
858 | if (zbudpage_is_dying(zbudpage)) { | |
859 | /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ | |
860 | goto out; | |
861 | } | |
862 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
863 | if (budnum == 0) | |
864 | size = zbudpage->zbud0_size; | |
865 | else | |
866 | size = zbudpage->zbud1_size; | |
867 | BUG_ON(size == 0 || size > zbud_max_size()); | |
868 | to_va = zbud_data(zbpg, budnum, size); | |
869 | to_va += sizeof(struct tmem_handle); | |
870 | size -= sizeof(struct tmem_handle); | |
871 | memcpy(to_va, from_va, size); | |
872 | ||
873 | kunmap_zbudpage_atomic(zbpg); | |
874 | ret = 0; | |
875 | out: | |
876 | zbudpage_spin_unlock(zbudpage); | |
877 | spin_unlock(lists_lock); | |
878 | return ret; | |
879 | } | |
880 | ||
881 | /* | |
882 | * Choose an ephemeral LRU zbudpage that is evictable (not locked), ensure | |
883 | * there are no references to it remaining, and return the now unused | |
884 | * (and re-init'ed) struct page and the total amount of compressed | |
885 | * data that was evicted. | |
886 | */ | |
887 | struct page *zbud_evict_pageframe_lru(unsigned int *zsize, unsigned int *zpages) | |
888 | { | |
889 | struct zbudpage *zbudpage = NULL, *zbudpage2; | |
890 | struct zbud_unbuddied *unbud = zbud_eph_unbuddied; | |
891 | struct page *page = NULL; | |
892 | bool irqs_disabled = irqs_disabled(); | |
893 | ||
894 | /* | |
895 | * Since this can be called indirectly from cleancache_put, which | |
896 | * has interrupts disabled, as well as frontswap_put, which does not, | |
897 | * we need to be able to handle both cases, even though it is ugly. | |
898 | */ | |
899 | if (irqs_disabled) | |
900 | spin_lock(&zbud_eph_lists_lock); | |
901 | else | |
902 | spin_lock_bh(&zbud_eph_lists_lock); | |
903 | *zsize = 0; | |
904 | if (list_empty(&zbud_eph_lru_list)) | |
905 | goto unlock_out; | |
906 | list_for_each_entry_safe(zbudpage, zbudpage2, &zbud_eph_lru_list, lru) { | |
907 | /* skip a locked zbudpage */ | |
908 | if (unlikely(!zbudpage_spin_trylock(zbudpage))) | |
909 | continue; | |
910 | /* skip an unevictable zbudpage */ | |
911 | if (unlikely(zbudpage->unevictable != 0)) { | |
912 | zbudpage_spin_unlock(zbudpage); | |
913 | continue; | |
914 | } | |
915 | /* got a locked evictable page */ | |
916 | goto evict_page; | |
917 | ||
918 | } | |
919 | unlock_out: | |
920 | /* no unlocked evictable pages, give up */ | |
921 | if (irqs_disabled) | |
922 | spin_unlock(&zbud_eph_lists_lock); | |
923 | else | |
924 | spin_unlock_bh(&zbud_eph_lists_lock); | |
925 | goto out; | |
926 | ||
927 | evict_page: | |
928 | list_del_init(&zbudpage->budlist); | |
929 | list_del_init(&zbudpage->lru); | |
930 | zbudpage_set_dying(zbudpage); | |
931 | /* | |
932 | * the zbudpage is now "dying" and attempts to read, write, | |
933 | * or delete data from it will be ignored | |
934 | */ | |
935 | if (zbudpage->zbud0_size != 0 && zbudpage->zbud1_size != 0) { | |
936 | *zsize = zbudpage->zbud0_size + zbudpage->zbud1_size - | |
937 | (2 * sizeof(struct tmem_handle)); | |
938 | *zpages = 2; | |
939 | } else if (zbudpage->zbud0_size != 0) { | |
940 | unbud[zbud_size_to_chunks(zbudpage->zbud0_size)].count--; | |
941 | *zsize = zbudpage->zbud0_size - sizeof(struct tmem_handle); | |
942 | *zpages = 1; | |
943 | } else if (zbudpage->zbud1_size != 0) { | |
944 | unbud[zbud_size_to_chunks(zbudpage->zbud1_size)].count--; | |
945 | *zsize = zbudpage->zbud1_size - sizeof(struct tmem_handle); | |
946 | *zpages = 1; | |
947 | } else { | |
948 | BUG(); | |
949 | } | |
950 | spin_unlock(&zbud_eph_lists_lock); | |
951 | zbud_eph_evicted_pageframes++; | |
952 | if (*zpages == 1) | |
953 | zbud_eph_unbuddied_count--; | |
954 | else | |
955 | zbud_eph_buddied_count--; | |
956 | zbud_evict_tmem(zbudpage); | |
957 | zbudpage_spin_lock(zbudpage); | |
958 | zbudpage_clear_dying(zbudpage); | |
959 | page = zbud_unuse_zbudpage(zbudpage, true); | |
960 | if (!irqs_disabled) | |
961 | local_bh_enable(); | |
962 | out: | |
963 | return page; | |
964 | } | |
965 | ||
966 | /* | |
967 | * Choose a persistent LRU zbudpage that is evictable (not locked), zombify it, | |
968 | * read the tmem_handle(s) out of it into the passed array, and return the | |
969 | * number of zbuds. Caller must perform necessary tmem functions and, | |
970 | * indirectly, zbud functions to fetch any valid data and cause the | |
971 | * now-zombified zbudpage to eventually be freed. We track the zombified | |
972 | * zbudpage count so it is possible to observe if there is a leak. | |
973 | FIXME: describe (ramster) case where data pointers are passed in for memcpy | |
974 | */ | |
975 | unsigned int zbud_make_zombie_lru(struct tmem_handle *th, unsigned char **data, | |
976 | unsigned int *zsize, bool eph) | |
977 | { | |
978 | struct zbudpage *zbudpage = NULL, *zbudpag2; | |
979 | struct tmem_handle *thfrom; | |
980 | char *from_va; | |
981 | void *zbpg; | |
982 | unsigned size; | |
983 | int ret = 0, i; | |
984 | spinlock_t *lists_lock = | |
985 | eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; | |
986 | struct list_head *lru_list = | |
987 | eph ? &zbud_eph_lru_list : &zbud_pers_lru_list; | |
988 | ||
989 | spin_lock_bh(lists_lock); | |
990 | if (list_empty(lru_list)) | |
991 | goto out; | |
992 | list_for_each_entry_safe(zbudpage, zbudpag2, lru_list, lru) { | |
993 | /* skip a locked zbudpage */ | |
994 | if (unlikely(!zbudpage_spin_trylock(zbudpage))) | |
995 | continue; | |
996 | /* skip an unevictable zbudpage */ | |
997 | if (unlikely(zbudpage->unevictable != 0)) { | |
998 | zbudpage_spin_unlock(zbudpage); | |
999 | continue; | |
1000 | } | |
1001 | /* got a locked evictable page */ | |
1002 | goto zombify_page; | |
1003 | } | |
1004 | /* no unlocked evictable pages, give up */ | |
1005 | goto out; | |
1006 | ||
1007 | zombify_page: | |
1008 | /* got an unlocked evictable page, zombify it */ | |
1009 | list_del_init(&zbudpage->budlist); | |
1010 | zbudpage_set_zombie(zbudpage); | |
1011 | /* FIXME what accounting do I need to do here? */ | |
1012 | list_del_init(&zbudpage->lru); | |
1013 | if (eph) { | |
1014 | list_add_tail(&zbudpage->lru, &zbud_eph_zombie_list); | |
1015 | zbud_eph_zombie_count = | |
1016 | atomic_inc_return(&zbud_eph_zombie_atomic); | |
1017 | } else { | |
1018 | list_add_tail(&zbudpage->lru, &zbud_pers_zombie_list); | |
1019 | zbud_pers_zombie_count = | |
1020 | atomic_inc_return(&zbud_pers_zombie_atomic); | |
1021 | } | |
1022 | /* FIXME what accounting do I need to do here? */ | |
1023 | zbpg = kmap_zbudpage_atomic(zbudpage); | |
1024 | for (i = 0; i < 2; i++) { | |
1025 | size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size; | |
1026 | if (size) { | |
1027 | from_va = zbud_data(zbpg, i, size); | |
1028 | thfrom = (struct tmem_handle *)from_va; | |
1029 | from_va += sizeof(struct tmem_handle); | |
1030 | size -= sizeof(struct tmem_handle); | |
1031 | if (th != NULL) | |
1032 | th[ret] = *thfrom; | |
1033 | if (data != NULL) | |
1034 | memcpy(data[ret], from_va, size); | |
1035 | if (zsize != NULL) | |
1036 | *zsize++ = size; | |
1037 | ret++; | |
1038 | } | |
1039 | } | |
1040 | kunmap_zbudpage_atomic(zbpg); | |
1041 | zbudpage_spin_unlock(zbudpage); | |
1042 | out: | |
1043 | spin_unlock_bh(lists_lock); | |
1044 | return ret; | |
1045 | } | |
1046 | ||
1047 | void __init zbud_init(void) | |
1048 | { | |
1049 | int i; | |
1050 | ||
1051 | #ifdef CONFIG_DEBUG_FS | |
1052 | zbud_debugfs_init(); | |
1053 | #endif | |
1054 | BUG_ON((sizeof(struct tmem_handle) * 2 > CHUNK_SIZE)); | |
1055 | BUG_ON(sizeof(struct zbudpage) > sizeof(struct page)); | |
1056 | for (i = 0; i < NCHUNKS; i++) { | |
1057 | INIT_LIST_HEAD(&zbud_eph_unbuddied[i].list); | |
1058 | INIT_LIST_HEAD(&zbud_pers_unbuddied[i].list); | |
1059 | } | |
1060 | } |