Commit | Line | Data |
---|---|---|
a50777c7 DM |
1 | /****************************************************************************** |
2 | * Xen selfballoon driver (and optional frontswap self-shrinking driver) | |
3 | * | |
4 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. | |
5 | * | |
6 | * This code complements the cleancache and frontswap patchsets to optimize | |
7 | * support for Xen Transcendent Memory ("tmem"). The policy it implements | |
8 | * is rudimentary and will likely improve over time, but it does work well | |
9 | * enough today. | |
10 | * | |
11 | * Two functionalities are implemented here which both use "control theory" | |
12 | * (feedback) to optimize memory utilization. In a virtualized environment | |
13 | * such as Xen, RAM is often a scarce resource and we would like to ensure | |
14 | * that each of a possibly large number of virtual machines is using RAM | |
15 | * efficiently, i.e. using as little as possible when under light load | |
16 | * and obtaining as much as possible when memory demands are high. | |
17 | * Since RAM needs vary highly dynamically and sometimes dramatically, | |
18 | * "hysteresis" is used, that is, memory target is determined not just | |
19 | * on current data but also on past data stored in the system. | |
20 | * | |
21 | * "Selfballooning" creates memory pressure by managing the Xen balloon | |
22 | * driver to decrease and increase available kernel memory, driven | |
23 | * largely by the target value of "Committed_AS" (see /proc/meminfo). | |
24 | * Since Committed_AS does not account for clean mapped pages (i.e. pages | |
25 | * in RAM that are identical to pages on disk), selfballooning has the | |
26 | * affect of pushing less frequently used clean pagecache pages out of | |
27 | * kernel RAM and, presumably using cleancache, into Xen tmem where | |
28 | * Xen can more efficiently optimize RAM utilization for such pages. | |
29 | * | |
30 | * When kernel memory demand unexpectedly increases faster than Xen, via | |
31 | * the selfballoon driver, is able to (or chooses to) provide usable RAM, | |
32 | * the kernel may invoke swapping. In most cases, frontswap is able | |
33 | * to absorb this swapping into Xen tmem. However, due to the fact | |
34 | * that the kernel swap subsystem assumes swapping occurs to a disk, | |
35 | * swapped pages may sit on the disk for a very long time; even if | |
36 | * the kernel knows the page will never be used again. This is because | |
37 | * the disk space costs very little and can be overwritten when | |
38 | * necessary. When such stale pages are in frontswap, however, they | |
39 | * are taking up valuable real estate. "Frontswap selfshrinking" works | |
40 | * to resolve this: When frontswap activity is otherwise stable | |
41 | * and the guest kernel is not under memory pressure, the "frontswap | |
42 | * selfshrinking" accounts for this by providing pressure to remove some | |
43 | * pages from frontswap and return them to kernel memory. | |
44 | * | |
45 | * For both "selfballooning" and "frontswap-selfshrinking", a worker | |
46 | * thread is used and sysfs tunables are provided to adjust the frequency | |
47 | * and rate of adjustments to achieve the goal, as well as to disable one | |
48 | * or both functions independently. | |
49 | * | |
50 | * While some argue that this functionality can and should be implemented | |
51 | * in userspace, it has been observed that bad things happen (e.g. OOMs). | |
52 | * | |
53 | * System configuration note: Selfballooning should not be enabled on | |
54 | * systems without a sufficiently large swap device configured; for best | |
55 | * results, it is recommended that total swap be increased by the size | |
37d46e15 KRW |
56 | * of the guest memory. Note, that selfballooning should be disabled by default |
57 | * if frontswap is not configured. Similarly selfballooning should be enabled | |
58 | * by default if frontswap is configured and can be disabled with the | |
59 | * "tmem.selfballooning=0" kernel boot option. Finally, when frontswap is | |
60 | * configured, frontswap-selfshrinking can be disabled with the | |
61 | * "tmem.selfshrink=0" kernel boot option. | |
a50777c7 DM |
62 | * |
63 | * Selfballooning is disallowed in domain0 and force-disabled. | |
64 | * | |
65 | */ | |
66 | ||
283c0972 JP |
67 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
68 | ||
a50777c7 | 69 | #include <linux/kernel.h> |
38a1ed4f DM |
70 | #include <linux/bootmem.h> |
71 | #include <linux/swap.h> | |
a50777c7 DM |
72 | #include <linux/mm.h> |
73 | #include <linux/mman.h> | |
0642d2ed | 74 | #include <linux/workqueue.h> |
cb0c05c5 | 75 | #include <linux/device.h> |
a50777c7 | 76 | #include <xen/balloon.h> |
a50777c7 | 77 | #include <xen/tmem.h> |
0642d2ed | 78 | #include <xen/xen.h> |
a50777c7 DM |
79 | |
80 | /* Enable/disable with sysfs. */ | |
81 | static int xen_selfballooning_enabled __read_mostly; | |
82 | ||
83 | /* | |
84 | * Controls rate at which memory target (this iteration) approaches | |
85 | * ultimate goal when memory need is increasing (up-hysteresis) or | |
86 | * decreasing (down-hysteresis). Higher values of hysteresis cause | |
87 | * slower increases/decreases. The default values for the various | |
88 | * parameters were deemed reasonable by experimentation, may be | |
89 | * workload-dependent, and can all be adjusted via sysfs. | |
90 | */ | |
91 | static unsigned int selfballoon_downhysteresis __read_mostly = 8; | |
92 | static unsigned int selfballoon_uphysteresis __read_mostly = 1; | |
93 | ||
94 | /* In HZ, controls frequency of worker invocation. */ | |
95 | static unsigned int selfballoon_interval __read_mostly = 5; | |
96 | ||
38a1ed4f DM |
97 | /* |
98 | * Minimum usable RAM in MB for selfballooning target for balloon. | |
99 | * If non-zero, it is added to totalreserve_pages and self-ballooning | |
100 | * will not balloon below the sum. If zero, a piecewise linear function | |
101 | * is calculated as a minimum and added to totalreserve_pages. Note that | |
102 | * setting this value indiscriminately may cause OOMs and crashes. | |
103 | */ | |
104 | static unsigned int selfballoon_min_usable_mb; | |
105 | ||
d79d5959 JS |
106 | /* |
107 | * Amount of RAM in MB to add to the target number of pages. | |
108 | * Can be used to reserve some more room for caches and the like. | |
109 | */ | |
110 | static unsigned int selfballoon_reserved_mb; | |
111 | ||
a50777c7 DM |
112 | static void selfballoon_process(struct work_struct *work); |
113 | static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); | |
114 | ||
115 | #ifdef CONFIG_FRONTSWAP | |
116 | #include <linux/frontswap.h> | |
117 | ||
118 | /* Enable/disable with sysfs. */ | |
119 | static bool frontswap_selfshrinking __read_mostly; | |
120 | ||
a50777c7 DM |
121 | /* |
122 | * The default values for the following parameters were deemed reasonable | |
123 | * by experimentation, may be workload-dependent, and can all be | |
124 | * adjusted via sysfs. | |
125 | */ | |
126 | ||
127 | /* Control rate for frontswap shrinking. Higher hysteresis is slower. */ | |
128 | static unsigned int frontswap_hysteresis __read_mostly = 20; | |
129 | ||
130 | /* | |
131 | * Number of selfballoon worker invocations to wait before observing that | |
132 | * frontswap selfshrinking should commence. Note that selfshrinking does | |
133 | * not use a separate worker thread. | |
134 | */ | |
135 | static unsigned int frontswap_inertia __read_mostly = 3; | |
136 | ||
137 | /* Countdown to next invocation of frontswap_shrink() */ | |
138 | static unsigned long frontswap_inertia_counter; | |
139 | ||
140 | /* | |
141 | * Invoked by the selfballoon worker thread, uses current number of pages | |
142 | * in frontswap (frontswap_curr_pages()), previous status, and control | |
143 | * values (hysteresis and inertia) to determine if frontswap should be | |
144 | * shrunk and what the new frontswap size should be. Note that | |
145 | * frontswap_shrink is essentially a partial swapoff that immediately | |
146 | * transfers pages from the "swap device" (frontswap) back into kernel | |
147 | * RAM; despite the name, frontswap "shrinking" is very different from | |
148 | * the "shrinker" interface used by the kernel MM subsystem to reclaim | |
149 | * memory. | |
150 | */ | |
151 | static void frontswap_selfshrink(void) | |
152 | { | |
153 | static unsigned long cur_frontswap_pages; | |
154 | static unsigned long last_frontswap_pages; | |
155 | static unsigned long tgt_frontswap_pages; | |
156 | ||
157 | last_frontswap_pages = cur_frontswap_pages; | |
158 | cur_frontswap_pages = frontswap_curr_pages(); | |
159 | if (!cur_frontswap_pages || | |
160 | (cur_frontswap_pages > last_frontswap_pages)) { | |
161 | frontswap_inertia_counter = frontswap_inertia; | |
162 | return; | |
163 | } | |
164 | if (frontswap_inertia_counter && --frontswap_inertia_counter) | |
165 | return; | |
166 | if (cur_frontswap_pages <= frontswap_hysteresis) | |
167 | tgt_frontswap_pages = 0; | |
168 | else | |
169 | tgt_frontswap_pages = cur_frontswap_pages - | |
170 | (cur_frontswap_pages / frontswap_hysteresis); | |
171 | frontswap_shrink(tgt_frontswap_pages); | |
d4c7abdf | 172 | frontswap_inertia_counter = frontswap_inertia; |
a50777c7 DM |
173 | } |
174 | ||
a50777c7 DM |
175 | #endif /* CONFIG_FRONTSWAP */ |
176 | ||
38a1ed4f | 177 | #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) |
bc1b0df5 | 178 | #define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT)) |
38a1ed4f | 179 | |
a50777c7 DM |
180 | /* |
181 | * Use current balloon size, the goal (vm_committed_as), and hysteresis | |
182 | * parameters to set a new target balloon size | |
183 | */ | |
184 | static void selfballoon_process(struct work_struct *work) | |
185 | { | |
38a1ed4f DM |
186 | unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; |
187 | unsigned long useful_pages; | |
a50777c7 DM |
188 | bool reset_timer = false; |
189 | ||
190 | if (xen_selfballooning_enabled) { | |
38a1ed4f | 191 | cur_pages = totalram_pages; |
a50777c7 | 192 | tgt_pages = cur_pages; /* default is no change */ |
997071bc | 193 | goal_pages = vm_memory_committed() + |
d79d5959 JS |
194 | totalreserve_pages + |
195 | MB2PAGES(selfballoon_reserved_mb); | |
a50777c7 DM |
196 | #ifdef CONFIG_FRONTSWAP |
197 | /* allow space for frontswap pages to be repatriated */ | |
8ea1d2a1 | 198 | if (frontswap_selfshrinking) |
a50777c7 DM |
199 | goal_pages += frontswap_curr_pages(); |
200 | #endif | |
201 | if (cur_pages > goal_pages) | |
202 | tgt_pages = cur_pages - | |
203 | ((cur_pages - goal_pages) / | |
204 | selfballoon_downhysteresis); | |
205 | else if (cur_pages < goal_pages) | |
206 | tgt_pages = cur_pages + | |
207 | ((goal_pages - cur_pages) / | |
208 | selfballoon_uphysteresis); | |
209 | /* else if cur_pages == goal_pages, no change */ | |
38a1ed4f DM |
210 | useful_pages = max_pfn - totalreserve_pages; |
211 | if (selfballoon_min_usable_mb != 0) | |
212 | floor_pages = totalreserve_pages + | |
213 | MB2PAGES(selfballoon_min_usable_mb); | |
214 | /* piecewise linear function ending in ~3% slope */ | |
215 | else if (useful_pages < MB2PAGES(16)) | |
216 | floor_pages = max_pfn; /* not worth ballooning */ | |
217 | else if (useful_pages < MB2PAGES(64)) | |
218 | floor_pages = totalreserve_pages + MB2PAGES(16) + | |
219 | ((useful_pages - MB2PAGES(16)) >> 1); | |
220 | else if (useful_pages < MB2PAGES(512)) | |
221 | floor_pages = totalreserve_pages + MB2PAGES(40) + | |
222 | ((useful_pages - MB2PAGES(40)) >> 3); | |
223 | else /* useful_pages >= MB2PAGES(512) */ | |
224 | floor_pages = totalreserve_pages + MB2PAGES(99) + | |
225 | ((useful_pages - MB2PAGES(99)) >> 5); | |
226 | if (tgt_pages < floor_pages) | |
227 | tgt_pages = floor_pages; | |
228 | balloon_set_new_target(tgt_pages + | |
229 | balloon_stats.current_pages - totalram_pages); | |
a50777c7 DM |
230 | reset_timer = true; |
231 | } | |
232 | #ifdef CONFIG_FRONTSWAP | |
8ea1d2a1 | 233 | if (frontswap_selfshrinking) { |
a50777c7 DM |
234 | frontswap_selfshrink(); |
235 | reset_timer = true; | |
236 | } | |
237 | #endif | |
238 | if (reset_timer) | |
239 | schedule_delayed_work(&selfballoon_worker, | |
240 | selfballoon_interval * HZ); | |
241 | } | |
242 | ||
243 | #ifdef CONFIG_SYSFS | |
244 | ||
a50777c7 DM |
245 | #include <linux/capability.h> |
246 | ||
247 | #define SELFBALLOON_SHOW(name, format, args...) \ | |
07068021 KS |
248 | static ssize_t show_##name(struct device *dev, \ |
249 | struct device_attribute *attr, \ | |
250 | char *buf) \ | |
a50777c7 DM |
251 | { \ |
252 | return sprintf(buf, format, ##args); \ | |
253 | } | |
254 | ||
255 | SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); | |
256 | ||
07068021 KS |
257 | static ssize_t store_selfballooning(struct device *dev, |
258 | struct device_attribute *attr, | |
a50777c7 DM |
259 | const char *buf, |
260 | size_t count) | |
261 | { | |
262 | bool was_enabled = xen_selfballooning_enabled; | |
263 | unsigned long tmp; | |
264 | int err; | |
265 | ||
266 | if (!capable(CAP_SYS_ADMIN)) | |
267 | return -EPERM; | |
268 | ||
d3dbd93d JH |
269 | err = kstrtoul(buf, 10, &tmp); |
270 | if (err) | |
271 | return err; | |
272 | if ((tmp != 0) && (tmp != 1)) | |
a50777c7 DM |
273 | return -EINVAL; |
274 | ||
275 | xen_selfballooning_enabled = !!tmp; | |
276 | if (!was_enabled && xen_selfballooning_enabled) | |
277 | schedule_delayed_work(&selfballoon_worker, | |
278 | selfballoon_interval * HZ); | |
279 | ||
280 | return count; | |
281 | } | |
282 | ||
07068021 | 283 | static DEVICE_ATTR(selfballooning, S_IRUGO | S_IWUSR, |
a50777c7 DM |
284 | show_selfballooning, store_selfballooning); |
285 | ||
286 | SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); | |
287 | ||
07068021 KS |
288 | static ssize_t store_selfballoon_interval(struct device *dev, |
289 | struct device_attribute *attr, | |
a50777c7 DM |
290 | const char *buf, |
291 | size_t count) | |
292 | { | |
293 | unsigned long val; | |
294 | int err; | |
295 | ||
296 | if (!capable(CAP_SYS_ADMIN)) | |
297 | return -EPERM; | |
d3dbd93d JH |
298 | err = kstrtoul(buf, 10, &val); |
299 | if (err) | |
300 | return err; | |
301 | if (val == 0) | |
a50777c7 DM |
302 | return -EINVAL; |
303 | selfballoon_interval = val; | |
304 | return count; | |
305 | } | |
306 | ||
07068021 | 307 | static DEVICE_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, |
a50777c7 DM |
308 | show_selfballoon_interval, store_selfballoon_interval); |
309 | ||
310 | SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); | |
311 | ||
07068021 KS |
312 | static ssize_t store_selfballoon_downhys(struct device *dev, |
313 | struct device_attribute *attr, | |
a50777c7 DM |
314 | const char *buf, |
315 | size_t count) | |
316 | { | |
317 | unsigned long val; | |
318 | int err; | |
319 | ||
320 | if (!capable(CAP_SYS_ADMIN)) | |
321 | return -EPERM; | |
d3dbd93d JH |
322 | err = kstrtoul(buf, 10, &val); |
323 | if (err) | |
324 | return err; | |
325 | if (val == 0) | |
a50777c7 DM |
326 | return -EINVAL; |
327 | selfballoon_downhysteresis = val; | |
328 | return count; | |
329 | } | |
330 | ||
07068021 | 331 | static DEVICE_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, |
a50777c7 DM |
332 | show_selfballoon_downhys, store_selfballoon_downhys); |
333 | ||
334 | ||
335 | SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); | |
336 | ||
07068021 KS |
337 | static ssize_t store_selfballoon_uphys(struct device *dev, |
338 | struct device_attribute *attr, | |
a50777c7 DM |
339 | const char *buf, |
340 | size_t count) | |
341 | { | |
342 | unsigned long val; | |
343 | int err; | |
344 | ||
345 | if (!capable(CAP_SYS_ADMIN)) | |
346 | return -EPERM; | |
d3dbd93d JH |
347 | err = kstrtoul(buf, 10, &val); |
348 | if (err) | |
349 | return err; | |
350 | if (val == 0) | |
a50777c7 DM |
351 | return -EINVAL; |
352 | selfballoon_uphysteresis = val; | |
353 | return count; | |
354 | } | |
355 | ||
07068021 | 356 | static DEVICE_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, |
a50777c7 DM |
357 | show_selfballoon_uphys, store_selfballoon_uphys); |
358 | ||
38a1ed4f DM |
359 | SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", |
360 | selfballoon_min_usable_mb); | |
361 | ||
07068021 KS |
362 | static ssize_t store_selfballoon_min_usable_mb(struct device *dev, |
363 | struct device_attribute *attr, | |
38a1ed4f DM |
364 | const char *buf, |
365 | size_t count) | |
366 | { | |
367 | unsigned long val; | |
368 | int err; | |
369 | ||
370 | if (!capable(CAP_SYS_ADMIN)) | |
371 | return -EPERM; | |
d3dbd93d JH |
372 | err = kstrtoul(buf, 10, &val); |
373 | if (err) | |
374 | return err; | |
375 | if (val == 0) | |
38a1ed4f DM |
376 | return -EINVAL; |
377 | selfballoon_min_usable_mb = val; | |
378 | return count; | |
379 | } | |
380 | ||
07068021 | 381 | static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, |
38a1ed4f DM |
382 | show_selfballoon_min_usable_mb, |
383 | store_selfballoon_min_usable_mb); | |
384 | ||
d79d5959 JS |
385 | SELFBALLOON_SHOW(selfballoon_reserved_mb, "%d\n", |
386 | selfballoon_reserved_mb); | |
387 | ||
388 | static ssize_t store_selfballoon_reserved_mb(struct device *dev, | |
389 | struct device_attribute *attr, | |
390 | const char *buf, | |
391 | size_t count) | |
392 | { | |
393 | unsigned long val; | |
394 | int err; | |
395 | ||
396 | if (!capable(CAP_SYS_ADMIN)) | |
397 | return -EPERM; | |
d3dbd93d JH |
398 | err = kstrtoul(buf, 10, &val); |
399 | if (err) | |
400 | return err; | |
401 | if (val == 0) | |
d79d5959 JS |
402 | return -EINVAL; |
403 | selfballoon_reserved_mb = val; | |
404 | return count; | |
405 | } | |
406 | ||
407 | static DEVICE_ATTR(selfballoon_reserved_mb, S_IRUGO | S_IWUSR, | |
408 | show_selfballoon_reserved_mb, | |
409 | store_selfballoon_reserved_mb); | |
410 | ||
38a1ed4f | 411 | |
a50777c7 DM |
412 | #ifdef CONFIG_FRONTSWAP |
413 | SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); | |
414 | ||
07068021 KS |
415 | static ssize_t store_frontswap_selfshrinking(struct device *dev, |
416 | struct device_attribute *attr, | |
a50777c7 DM |
417 | const char *buf, |
418 | size_t count) | |
419 | { | |
420 | bool was_enabled = frontswap_selfshrinking; | |
421 | unsigned long tmp; | |
422 | int err; | |
423 | ||
424 | if (!capable(CAP_SYS_ADMIN)) | |
425 | return -EPERM; | |
d3dbd93d JH |
426 | err = kstrtoul(buf, 10, &tmp); |
427 | if (err) | |
428 | return err; | |
429 | if ((tmp != 0) && (tmp != 1)) | |
a50777c7 DM |
430 | return -EINVAL; |
431 | frontswap_selfshrinking = !!tmp; | |
432 | if (!was_enabled && !xen_selfballooning_enabled && | |
433 | frontswap_selfshrinking) | |
434 | schedule_delayed_work(&selfballoon_worker, | |
435 | selfballoon_interval * HZ); | |
436 | ||
437 | return count; | |
438 | } | |
439 | ||
07068021 | 440 | static DEVICE_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, |
a50777c7 DM |
441 | show_frontswap_selfshrinking, store_frontswap_selfshrinking); |
442 | ||
443 | SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); | |
444 | ||
07068021 KS |
445 | static ssize_t store_frontswap_inertia(struct device *dev, |
446 | struct device_attribute *attr, | |
a50777c7 DM |
447 | const char *buf, |
448 | size_t count) | |
449 | { | |
450 | unsigned long val; | |
451 | int err; | |
452 | ||
453 | if (!capable(CAP_SYS_ADMIN)) | |
454 | return -EPERM; | |
d3dbd93d JH |
455 | err = kstrtoul(buf, 10, &val); |
456 | if (err) | |
457 | return err; | |
458 | if (val == 0) | |
a50777c7 DM |
459 | return -EINVAL; |
460 | frontswap_inertia = val; | |
461 | frontswap_inertia_counter = val; | |
462 | return count; | |
463 | } | |
464 | ||
07068021 | 465 | static DEVICE_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, |
a50777c7 DM |
466 | show_frontswap_inertia, store_frontswap_inertia); |
467 | ||
468 | SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); | |
469 | ||
07068021 KS |
470 | static ssize_t store_frontswap_hysteresis(struct device *dev, |
471 | struct device_attribute *attr, | |
a50777c7 DM |
472 | const char *buf, |
473 | size_t count) | |
474 | { | |
475 | unsigned long val; | |
476 | int err; | |
477 | ||
478 | if (!capable(CAP_SYS_ADMIN)) | |
479 | return -EPERM; | |
d3dbd93d JH |
480 | err = kstrtoul(buf, 10, &val); |
481 | if (err) | |
482 | return err; | |
483 | if (val == 0) | |
a50777c7 DM |
484 | return -EINVAL; |
485 | frontswap_hysteresis = val; | |
486 | return count; | |
487 | } | |
488 | ||
07068021 | 489 | static DEVICE_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, |
a50777c7 DM |
490 | show_frontswap_hysteresis, store_frontswap_hysteresis); |
491 | ||
492 | #endif /* CONFIG_FRONTSWAP */ | |
493 | ||
494 | static struct attribute *selfballoon_attrs[] = { | |
07068021 KS |
495 | &dev_attr_selfballooning.attr, |
496 | &dev_attr_selfballoon_interval.attr, | |
497 | &dev_attr_selfballoon_downhysteresis.attr, | |
498 | &dev_attr_selfballoon_uphysteresis.attr, | |
499 | &dev_attr_selfballoon_min_usable_mb.attr, | |
d79d5959 | 500 | &dev_attr_selfballoon_reserved_mb.attr, |
a50777c7 | 501 | #ifdef CONFIG_FRONTSWAP |
07068021 KS |
502 | &dev_attr_frontswap_selfshrinking.attr, |
503 | &dev_attr_frontswap_hysteresis.attr, | |
504 | &dev_attr_frontswap_inertia.attr, | |
a50777c7 DM |
505 | #endif |
506 | NULL | |
507 | }; | |
508 | ||
ead1d014 | 509 | static const struct attribute_group selfballoon_group = { |
a50777c7 DM |
510 | .name = "selfballoon", |
511 | .attrs = selfballoon_attrs | |
512 | }; | |
513 | #endif | |
514 | ||
07068021 | 515 | int register_xen_selfballooning(struct device *dev) |
a50777c7 DM |
516 | { |
517 | int error = -1; | |
518 | ||
519 | #ifdef CONFIG_SYSFS | |
07068021 | 520 | error = sysfs_create_group(&dev->kobj, &selfballoon_group); |
a50777c7 DM |
521 | #endif |
522 | return error; | |
523 | } | |
524 | EXPORT_SYMBOL(register_xen_selfballooning); | |
525 | ||
10a7a077 | 526 | int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) |
a50777c7 DM |
527 | { |
528 | bool enable = false; | |
bc1b0df5 | 529 | unsigned long reserve_pages; |
a50777c7 DM |
530 | |
531 | if (!xen_domain()) | |
532 | return -ENODEV; | |
533 | ||
534 | if (xen_initial_domain()) { | |
283c0972 | 535 | pr_info("Xen selfballooning driver disabled for domain0\n"); |
a50777c7 DM |
536 | return -ENODEV; |
537 | } | |
538 | ||
539 | xen_selfballooning_enabled = tmem_enabled && use_selfballooning; | |
540 | if (xen_selfballooning_enabled) { | |
283c0972 | 541 | pr_info("Initializing Xen selfballooning driver\n"); |
a50777c7 DM |
542 | enable = true; |
543 | } | |
544 | #ifdef CONFIG_FRONTSWAP | |
545 | frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; | |
546 | if (frontswap_selfshrinking) { | |
283c0972 | 547 | pr_info("Initializing frontswap selfshrinking driver\n"); |
a50777c7 DM |
548 | enable = true; |
549 | } | |
550 | #endif | |
551 | if (!enable) | |
552 | return -ENODEV; | |
553 | ||
bc1b0df5 BL |
554 | /* |
555 | * Give selfballoon_reserved_mb a default value(10% of total ram pages) | |
556 | * to make selfballoon not so aggressive. | |
557 | * | |
558 | * There are mainly two reasons: | |
559 | * 1) The original goal_page didn't consider some pages used by kernel | |
560 | * space, like slab pages and memory used by device drivers. | |
561 | * | |
562 | * 2) The balloon driver may not give back memory to guest OS fast | |
563 | * enough when the workload suddenly aquries a lot of physical memory. | |
564 | * | |
565 | * In both cases, the guest OS will suffer from memory pressure and | |
566 | * OOM killer may be triggered. | |
567 | * By reserving extra 10% of total ram pages, we can keep the system | |
568 | * much more reliably and response faster in some cases. | |
569 | */ | |
570 | if (!selfballoon_reserved_mb) { | |
571 | reserve_pages = totalram_pages / 10; | |
572 | selfballoon_reserved_mb = PAGES2MB(reserve_pages); | |
573 | } | |
a50777c7 DM |
574 | schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); |
575 | ||
576 | return 0; | |
577 | } | |
10a7a077 | 578 | EXPORT_SYMBOL(xen_selfballoon_init); |