diff options
Diffstat (limited to 'drivers/xen/xen-selfballoon.c')
-rw-r--r-- | drivers/xen/xen-selfballoon.c | 579 |
1 files changed, 0 insertions, 579 deletions
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c deleted file mode 100644 index 246f6122c9ee..000000000000 --- a/drivers/xen/xen-selfballoon.c +++ /dev/null | |||
@@ -1,579 +0,0 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /****************************************************************************** | ||
3 | * Xen selfballoon driver (and optional frontswap self-shrinking driver) | ||
4 | * | ||
5 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. | ||
6 | * | ||
7 | * This code complements the cleancache and frontswap patchsets to optimize | ||
8 | * support for Xen Transcendent Memory ("tmem"). The policy it implements | ||
9 | * is rudimentary and will likely improve over time, but it does work well | ||
10 | * enough today. | ||
11 | * | ||
12 | * Two functionalities are implemented here which both use "control theory" | ||
13 | * (feedback) to optimize memory utilization. In a virtualized environment | ||
14 | * such as Xen, RAM is often a scarce resource and we would like to ensure | ||
15 | * that each of a possibly large number of virtual machines is using RAM | ||
16 | * efficiently, i.e. using as little as possible when under light load | ||
17 | * and obtaining as much as possible when memory demands are high. | ||
18 | * Since RAM needs vary highly dynamically and sometimes dramatically, | ||
19 | * "hysteresis" is used, that is, memory target is determined not just | ||
20 | * on current data but also on past data stored in the system. | ||
21 | * | ||
22 | * "Selfballooning" creates memory pressure by managing the Xen balloon | ||
23 | * driver to decrease and increase available kernel memory, driven | ||
24 | * largely by the target value of "Committed_AS" (see /proc/meminfo). | ||
25 | * Since Committed_AS does not account for clean mapped pages (i.e. pages | ||
26 | * in RAM that are identical to pages on disk), selfballooning has the | ||
27 | * affect of pushing less frequently used clean pagecache pages out of | ||
28 | * kernel RAM and, presumably using cleancache, into Xen tmem where | ||
29 | * Xen can more efficiently optimize RAM utilization for such pages. | ||
30 | * | ||
31 | * When kernel memory demand unexpectedly increases faster than Xen, via | ||
32 | * the selfballoon driver, is able to (or chooses to) provide usable RAM, | ||
33 | * the kernel may invoke swapping. In most cases, frontswap is able | ||
34 | * to absorb this swapping into Xen tmem. However, due to the fact | ||
35 | * that the kernel swap subsystem assumes swapping occurs to a disk, | ||
36 | * swapped pages may sit on the disk for a very long time; even if | ||
37 | * the kernel knows the page will never be used again. This is because | ||
38 | * the disk space costs very little and can be overwritten when | ||
39 | * necessary. When such stale pages are in frontswap, however, they | ||
40 | * are taking up valuable real estate. "Frontswap selfshrinking" works | ||
41 | * to resolve this: When frontswap activity is otherwise stable | ||
42 | * and the guest kernel is not under memory pressure, the "frontswap | ||
43 | * selfshrinking" accounts for this by providing pressure to remove some | ||
44 | * pages from frontswap and return them to kernel memory. | ||
45 | * | ||
46 | * For both "selfballooning" and "frontswap-selfshrinking", a worker | ||
47 | * thread is used and sysfs tunables are provided to adjust the frequency | ||
48 | * and rate of adjustments to achieve the goal, as well as to disable one | ||
49 | * or both functions independently. | ||
50 | * | ||
51 | * While some argue that this functionality can and should be implemented | ||
52 | * in userspace, it has been observed that bad things happen (e.g. OOMs). | ||
53 | * | ||
54 | * System configuration note: Selfballooning should not be enabled on | ||
55 | * systems without a sufficiently large swap device configured; for best | ||
56 | * results, it is recommended that total swap be increased by the size | ||
57 | * of the guest memory. Note, that selfballooning should be disabled by default | ||
58 | * if frontswap is not configured. Similarly selfballooning should be enabled | ||
59 | * by default if frontswap is configured and can be disabled with the | ||
60 | * "tmem.selfballooning=0" kernel boot option. Finally, when frontswap is | ||
61 | * configured, frontswap-selfshrinking can be disabled with the | ||
62 | * "tmem.selfshrink=0" kernel boot option. | ||
63 | * | ||
64 | * Selfballooning is disallowed in domain0 and force-disabled. | ||
65 | * | ||
66 | */ | ||
67 | |||
68 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
69 | |||
70 | #include <linux/kernel.h> | ||
71 | #include <linux/memblock.h> | ||
72 | #include <linux/swap.h> | ||
73 | #include <linux/mm.h> | ||
74 | #include <linux/mman.h> | ||
75 | #include <linux/workqueue.h> | ||
76 | #include <linux/device.h> | ||
77 | #include <xen/balloon.h> | ||
78 | #include <xen/tmem.h> | ||
79 | #include <xen/xen.h> | ||
80 | |||
81 | /* Enable/disable with sysfs. */ | ||
82 | static int xen_selfballooning_enabled __read_mostly; | ||
83 | |||
84 | /* | ||
85 | * Controls rate at which memory target (this iteration) approaches | ||
86 | * ultimate goal when memory need is increasing (up-hysteresis) or | ||
87 | * decreasing (down-hysteresis). Higher values of hysteresis cause | ||
88 | * slower increases/decreases. The default values for the various | ||
89 | * parameters were deemed reasonable by experimentation, may be | ||
90 | * workload-dependent, and can all be adjusted via sysfs. | ||
91 | */ | ||
92 | static unsigned int selfballoon_downhysteresis __read_mostly = 8; | ||
93 | static unsigned int selfballoon_uphysteresis __read_mostly = 1; | ||
94 | |||
95 | /* In HZ, controls frequency of worker invocation. */ | ||
96 | static unsigned int selfballoon_interval __read_mostly = 5; | ||
97 | |||
98 | /* | ||
99 | * Minimum usable RAM in MB for selfballooning target for balloon. | ||
100 | * If non-zero, it is added to totalreserve_pages and self-ballooning | ||
101 | * will not balloon below the sum. If zero, a piecewise linear function | ||
102 | * is calculated as a minimum and added to totalreserve_pages. Note that | ||
103 | * setting this value indiscriminately may cause OOMs and crashes. | ||
104 | */ | ||
105 | static unsigned int selfballoon_min_usable_mb; | ||
106 | |||
107 | /* | ||
108 | * Amount of RAM in MB to add to the target number of pages. | ||
109 | * Can be used to reserve some more room for caches and the like. | ||
110 | */ | ||
111 | static unsigned int selfballoon_reserved_mb; | ||
112 | |||
113 | static void selfballoon_process(struct work_struct *work); | ||
114 | static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); | ||
115 | |||
116 | #ifdef CONFIG_FRONTSWAP | ||
117 | #include <linux/frontswap.h> | ||
118 | |||
119 | /* Enable/disable with sysfs. */ | ||
120 | static bool frontswap_selfshrinking __read_mostly; | ||
121 | |||
122 | /* | ||
123 | * The default values for the following parameters were deemed reasonable | ||
124 | * by experimentation, may be workload-dependent, and can all be | ||
125 | * adjusted via sysfs. | ||
126 | */ | ||
127 | |||
128 | /* Control rate for frontswap shrinking. Higher hysteresis is slower. */ | ||
129 | static unsigned int frontswap_hysteresis __read_mostly = 20; | ||
130 | |||
131 | /* | ||
132 | * Number of selfballoon worker invocations to wait before observing that | ||
133 | * frontswap selfshrinking should commence. Note that selfshrinking does | ||
134 | * not use a separate worker thread. | ||
135 | */ | ||
136 | static unsigned int frontswap_inertia __read_mostly = 3; | ||
137 | |||
138 | /* Countdown to next invocation of frontswap_shrink() */ | ||
139 | static unsigned long frontswap_inertia_counter; | ||
140 | |||
141 | /* | ||
142 | * Invoked by the selfballoon worker thread, uses current number of pages | ||
143 | * in frontswap (frontswap_curr_pages()), previous status, and control | ||
144 | * values (hysteresis and inertia) to determine if frontswap should be | ||
145 | * shrunk and what the new frontswap size should be. Note that | ||
146 | * frontswap_shrink is essentially a partial swapoff that immediately | ||
147 | * transfers pages from the "swap device" (frontswap) back into kernel | ||
148 | * RAM; despite the name, frontswap "shrinking" is very different from | ||
149 | * the "shrinker" interface used by the kernel MM subsystem to reclaim | ||
150 | * memory. | ||
151 | */ | ||
152 | static void frontswap_selfshrink(void) | ||
153 | { | ||
154 | static unsigned long cur_frontswap_pages; | ||
155 | unsigned long last_frontswap_pages; | ||
156 | unsigned long tgt_frontswap_pages; | ||
157 | |||
158 | last_frontswap_pages = cur_frontswap_pages; | ||
159 | cur_frontswap_pages = frontswap_curr_pages(); | ||
160 | if (!cur_frontswap_pages || | ||
161 | (cur_frontswap_pages > last_frontswap_pages)) { | ||
162 | frontswap_inertia_counter = frontswap_inertia; | ||
163 | return; | ||
164 | } | ||
165 | if (frontswap_inertia_counter && --frontswap_inertia_counter) | ||
166 | return; | ||
167 | if (cur_frontswap_pages <= frontswap_hysteresis) | ||
168 | tgt_frontswap_pages = 0; | ||
169 | else | ||
170 | tgt_frontswap_pages = cur_frontswap_pages - | ||
171 | (cur_frontswap_pages / frontswap_hysteresis); | ||
172 | frontswap_shrink(tgt_frontswap_pages); | ||
173 | frontswap_inertia_counter = frontswap_inertia; | ||
174 | } | ||
175 | |||
176 | #endif /* CONFIG_FRONTSWAP */ | ||
177 | |||
178 | #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) | ||
179 | #define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT)) | ||
180 | |||
181 | /* | ||
182 | * Use current balloon size, the goal (vm_committed_as), and hysteresis | ||
183 | * parameters to set a new target balloon size | ||
184 | */ | ||
185 | static void selfballoon_process(struct work_struct *work) | ||
186 | { | ||
187 | unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; | ||
188 | unsigned long useful_pages; | ||
189 | bool reset_timer = false; | ||
190 | |||
191 | if (xen_selfballooning_enabled) { | ||
192 | cur_pages = totalram_pages(); | ||
193 | tgt_pages = cur_pages; /* default is no change */ | ||
194 | goal_pages = vm_memory_committed() + | ||
195 | totalreserve_pages + | ||
196 | MB2PAGES(selfballoon_reserved_mb); | ||
197 | #ifdef CONFIG_FRONTSWAP | ||
198 | /* allow space for frontswap pages to be repatriated */ | ||
199 | if (frontswap_selfshrinking) | ||
200 | goal_pages += frontswap_curr_pages(); | ||
201 | #endif | ||
202 | if (cur_pages > goal_pages) | ||
203 | tgt_pages = cur_pages - | ||
204 | ((cur_pages - goal_pages) / | ||
205 | selfballoon_downhysteresis); | ||
206 | else if (cur_pages < goal_pages) | ||
207 | tgt_pages = cur_pages + | ||
208 | ((goal_pages - cur_pages) / | ||
209 | selfballoon_uphysteresis); | ||
210 | /* else if cur_pages == goal_pages, no change */ | ||
211 | useful_pages = max_pfn - totalreserve_pages; | ||
212 | if (selfballoon_min_usable_mb != 0) | ||
213 | floor_pages = totalreserve_pages + | ||
214 | MB2PAGES(selfballoon_min_usable_mb); | ||
215 | /* piecewise linear function ending in ~3% slope */ | ||
216 | else if (useful_pages < MB2PAGES(16)) | ||
217 | floor_pages = max_pfn; /* not worth ballooning */ | ||
218 | else if (useful_pages < MB2PAGES(64)) | ||
219 | floor_pages = totalreserve_pages + MB2PAGES(16) + | ||
220 | ((useful_pages - MB2PAGES(16)) >> 1); | ||
221 | else if (useful_pages < MB2PAGES(512)) | ||
222 | floor_pages = totalreserve_pages + MB2PAGES(40) + | ||
223 | ((useful_pages - MB2PAGES(40)) >> 3); | ||
224 | else /* useful_pages >= MB2PAGES(512) */ | ||
225 | floor_pages = totalreserve_pages + MB2PAGES(99) + | ||
226 | ((useful_pages - MB2PAGES(99)) >> 5); | ||
227 | if (tgt_pages < floor_pages) | ||
228 | tgt_pages = floor_pages; | ||
229 | balloon_set_new_target(tgt_pages + | ||
230 | balloon_stats.current_pages - totalram_pages()); | ||
231 | reset_timer = true; | ||
232 | } | ||
233 | #ifdef CONFIG_FRONTSWAP | ||
234 | if (frontswap_selfshrinking) { | ||
235 | frontswap_selfshrink(); | ||
236 | reset_timer = true; | ||
237 | } | ||
238 | #endif | ||
239 | if (reset_timer) | ||
240 | schedule_delayed_work(&selfballoon_worker, | ||
241 | selfballoon_interval * HZ); | ||
242 | } | ||
243 | |||
244 | #ifdef CONFIG_SYSFS | ||
245 | |||
246 | #include <linux/capability.h> | ||
247 | |||
248 | #define SELFBALLOON_SHOW(name, format, args...) \ | ||
249 | static ssize_t show_##name(struct device *dev, \ | ||
250 | struct device_attribute *attr, \ | ||
251 | char *buf) \ | ||
252 | { \ | ||
253 | return sprintf(buf, format, ##args); \ | ||
254 | } | ||
255 | |||
256 | SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); | ||
257 | |||
258 | static ssize_t store_selfballooning(struct device *dev, | ||
259 | struct device_attribute *attr, | ||
260 | const char *buf, | ||
261 | size_t count) | ||
262 | { | ||
263 | bool was_enabled = xen_selfballooning_enabled; | ||
264 | unsigned long tmp; | ||
265 | int err; | ||
266 | |||
267 | if (!capable(CAP_SYS_ADMIN)) | ||
268 | return -EPERM; | ||
269 | |||
270 | err = kstrtoul(buf, 10, &tmp); | ||
271 | if (err) | ||
272 | return err; | ||
273 | if ((tmp != 0) && (tmp != 1)) | ||
274 | return -EINVAL; | ||
275 | |||
276 | xen_selfballooning_enabled = !!tmp; | ||
277 | if (!was_enabled && xen_selfballooning_enabled) | ||
278 | schedule_delayed_work(&selfballoon_worker, | ||
279 | selfballoon_interval * HZ); | ||
280 | |||
281 | return count; | ||
282 | } | ||
283 | |||
284 | static DEVICE_ATTR(selfballooning, S_IRUGO | S_IWUSR, | ||
285 | show_selfballooning, store_selfballooning); | ||
286 | |||
287 | SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); | ||
288 | |||
289 | static ssize_t store_selfballoon_interval(struct device *dev, | ||
290 | struct device_attribute *attr, | ||
291 | const char *buf, | ||
292 | size_t count) | ||
293 | { | ||
294 | unsigned long val; | ||
295 | int err; | ||
296 | |||
297 | if (!capable(CAP_SYS_ADMIN)) | ||
298 | return -EPERM; | ||
299 | err = kstrtoul(buf, 10, &val); | ||
300 | if (err) | ||
301 | return err; | ||
302 | if (val == 0) | ||
303 | return -EINVAL; | ||
304 | selfballoon_interval = val; | ||
305 | return count; | ||
306 | } | ||
307 | |||
308 | static DEVICE_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, | ||
309 | show_selfballoon_interval, store_selfballoon_interval); | ||
310 | |||
311 | SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); | ||
312 | |||
313 | static ssize_t store_selfballoon_downhys(struct device *dev, | ||
314 | struct device_attribute *attr, | ||
315 | const char *buf, | ||
316 | size_t count) | ||
317 | { | ||
318 | unsigned long val; | ||
319 | int err; | ||
320 | |||
321 | if (!capable(CAP_SYS_ADMIN)) | ||
322 | return -EPERM; | ||
323 | err = kstrtoul(buf, 10, &val); | ||
324 | if (err) | ||
325 | return err; | ||
326 | if (val == 0) | ||
327 | return -EINVAL; | ||
328 | selfballoon_downhysteresis = val; | ||
329 | return count; | ||
330 | } | ||
331 | |||
332 | static DEVICE_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, | ||
333 | show_selfballoon_downhys, store_selfballoon_downhys); | ||
334 | |||
335 | |||
336 | SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); | ||
337 | |||
338 | static ssize_t store_selfballoon_uphys(struct device *dev, | ||
339 | struct device_attribute *attr, | ||
340 | const char *buf, | ||
341 | size_t count) | ||
342 | { | ||
343 | unsigned long val; | ||
344 | int err; | ||
345 | |||
346 | if (!capable(CAP_SYS_ADMIN)) | ||
347 | return -EPERM; | ||
348 | err = kstrtoul(buf, 10, &val); | ||
349 | if (err) | ||
350 | return err; | ||
351 | if (val == 0) | ||
352 | return -EINVAL; | ||
353 | selfballoon_uphysteresis = val; | ||
354 | return count; | ||
355 | } | ||
356 | |||
357 | static DEVICE_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, | ||
358 | show_selfballoon_uphys, store_selfballoon_uphys); | ||
359 | |||
360 | SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", | ||
361 | selfballoon_min_usable_mb); | ||
362 | |||
363 | static ssize_t store_selfballoon_min_usable_mb(struct device *dev, | ||
364 | struct device_attribute *attr, | ||
365 | const char *buf, | ||
366 | size_t count) | ||
367 | { | ||
368 | unsigned long val; | ||
369 | int err; | ||
370 | |||
371 | if (!capable(CAP_SYS_ADMIN)) | ||
372 | return -EPERM; | ||
373 | err = kstrtoul(buf, 10, &val); | ||
374 | if (err) | ||
375 | return err; | ||
376 | if (val == 0) | ||
377 | return -EINVAL; | ||
378 | selfballoon_min_usable_mb = val; | ||
379 | return count; | ||
380 | } | ||
381 | |||
382 | static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, | ||
383 | show_selfballoon_min_usable_mb, | ||
384 | store_selfballoon_min_usable_mb); | ||
385 | |||
386 | SELFBALLOON_SHOW(selfballoon_reserved_mb, "%d\n", | ||
387 | selfballoon_reserved_mb); | ||
388 | |||
389 | static ssize_t store_selfballoon_reserved_mb(struct device *dev, | ||
390 | struct device_attribute *attr, | ||
391 | const char *buf, | ||
392 | size_t count) | ||
393 | { | ||
394 | unsigned long val; | ||
395 | int err; | ||
396 | |||
397 | if (!capable(CAP_SYS_ADMIN)) | ||
398 | return -EPERM; | ||
399 | err = kstrtoul(buf, 10, &val); | ||
400 | if (err) | ||
401 | return err; | ||
402 | if (val == 0) | ||
403 | return -EINVAL; | ||
404 | selfballoon_reserved_mb = val; | ||
405 | return count; | ||
406 | } | ||
407 | |||
408 | static DEVICE_ATTR(selfballoon_reserved_mb, S_IRUGO | S_IWUSR, | ||
409 | show_selfballoon_reserved_mb, | ||
410 | store_selfballoon_reserved_mb); | ||
411 | |||
412 | |||
413 | #ifdef CONFIG_FRONTSWAP | ||
414 | SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); | ||
415 | |||
416 | static ssize_t store_frontswap_selfshrinking(struct device *dev, | ||
417 | struct device_attribute *attr, | ||
418 | const char *buf, | ||
419 | size_t count) | ||
420 | { | ||
421 | bool was_enabled = frontswap_selfshrinking; | ||
422 | unsigned long tmp; | ||
423 | int err; | ||
424 | |||
425 | if (!capable(CAP_SYS_ADMIN)) | ||
426 | return -EPERM; | ||
427 | err = kstrtoul(buf, 10, &tmp); | ||
428 | if (err) | ||
429 | return err; | ||
430 | if ((tmp != 0) && (tmp != 1)) | ||
431 | return -EINVAL; | ||
432 | frontswap_selfshrinking = !!tmp; | ||
433 | if (!was_enabled && !xen_selfballooning_enabled && | ||
434 | frontswap_selfshrinking) | ||
435 | schedule_delayed_work(&selfballoon_worker, | ||
436 | selfballoon_interval * HZ); | ||
437 | |||
438 | return count; | ||
439 | } | ||
440 | |||
441 | static DEVICE_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, | ||
442 | show_frontswap_selfshrinking, store_frontswap_selfshrinking); | ||
443 | |||
444 | SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); | ||
445 | |||
446 | static ssize_t store_frontswap_inertia(struct device *dev, | ||
447 | struct device_attribute *attr, | ||
448 | const char *buf, | ||
449 | size_t count) | ||
450 | { | ||
451 | unsigned long val; | ||
452 | int err; | ||
453 | |||
454 | if (!capable(CAP_SYS_ADMIN)) | ||
455 | return -EPERM; | ||
456 | err = kstrtoul(buf, 10, &val); | ||
457 | if (err) | ||
458 | return err; | ||
459 | if (val == 0) | ||
460 | return -EINVAL; | ||
461 | frontswap_inertia = val; | ||
462 | frontswap_inertia_counter = val; | ||
463 | return count; | ||
464 | } | ||
465 | |||
466 | static DEVICE_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, | ||
467 | show_frontswap_inertia, store_frontswap_inertia); | ||
468 | |||
469 | SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); | ||
470 | |||
471 | static ssize_t store_frontswap_hysteresis(struct device *dev, | ||
472 | struct device_attribute *attr, | ||
473 | const char *buf, | ||
474 | size_t count) | ||
475 | { | ||
476 | unsigned long val; | ||
477 | int err; | ||
478 | |||
479 | if (!capable(CAP_SYS_ADMIN)) | ||
480 | return -EPERM; | ||
481 | err = kstrtoul(buf, 10, &val); | ||
482 | if (err) | ||
483 | return err; | ||
484 | if (val == 0) | ||
485 | return -EINVAL; | ||
486 | frontswap_hysteresis = val; | ||
487 | return count; | ||
488 | } | ||
489 | |||
490 | static DEVICE_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, | ||
491 | show_frontswap_hysteresis, store_frontswap_hysteresis); | ||
492 | |||
493 | #endif /* CONFIG_FRONTSWAP */ | ||
494 | |||
495 | static struct attribute *selfballoon_attrs[] = { | ||
496 | &dev_attr_selfballooning.attr, | ||
497 | &dev_attr_selfballoon_interval.attr, | ||
498 | &dev_attr_selfballoon_downhysteresis.attr, | ||
499 | &dev_attr_selfballoon_uphysteresis.attr, | ||
500 | &dev_attr_selfballoon_min_usable_mb.attr, | ||
501 | &dev_attr_selfballoon_reserved_mb.attr, | ||
502 | #ifdef CONFIG_FRONTSWAP | ||
503 | &dev_attr_frontswap_selfshrinking.attr, | ||
504 | &dev_attr_frontswap_hysteresis.attr, | ||
505 | &dev_attr_frontswap_inertia.attr, | ||
506 | #endif | ||
507 | NULL | ||
508 | }; | ||
509 | |||
510 | static const struct attribute_group selfballoon_group = { | ||
511 | .name = "selfballoon", | ||
512 | .attrs = selfballoon_attrs | ||
513 | }; | ||
514 | #endif | ||
515 | |||
516 | int register_xen_selfballooning(struct device *dev) | ||
517 | { | ||
518 | int error = -1; | ||
519 | |||
520 | #ifdef CONFIG_SYSFS | ||
521 | error = sysfs_create_group(&dev->kobj, &selfballoon_group); | ||
522 | #endif | ||
523 | return error; | ||
524 | } | ||
525 | EXPORT_SYMBOL(register_xen_selfballooning); | ||
526 | |||
527 | int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) | ||
528 | { | ||
529 | bool enable = false; | ||
530 | unsigned long reserve_pages; | ||
531 | |||
532 | if (!xen_domain()) | ||
533 | return -ENODEV; | ||
534 | |||
535 | if (xen_initial_domain()) { | ||
536 | pr_info("Xen selfballooning driver disabled for domain0\n"); | ||
537 | return -ENODEV; | ||
538 | } | ||
539 | |||
540 | xen_selfballooning_enabled = tmem_enabled && use_selfballooning; | ||
541 | if (xen_selfballooning_enabled) { | ||
542 | pr_info("Initializing Xen selfballooning driver\n"); | ||
543 | enable = true; | ||
544 | } | ||
545 | #ifdef CONFIG_FRONTSWAP | ||
546 | frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; | ||
547 | if (frontswap_selfshrinking) { | ||
548 | pr_info("Initializing frontswap selfshrinking driver\n"); | ||
549 | enable = true; | ||
550 | } | ||
551 | #endif | ||
552 | if (!enable) | ||
553 | return -ENODEV; | ||
554 | |||
555 | /* | ||
556 | * Give selfballoon_reserved_mb a default value(10% of total ram pages) | ||
557 | * to make selfballoon not so aggressive. | ||
558 | * | ||
559 | * There are mainly two reasons: | ||
560 | * 1) The original goal_page didn't consider some pages used by kernel | ||
561 | * space, like slab pages and memory used by device drivers. | ||
562 | * | ||
563 | * 2) The balloon driver may not give back memory to guest OS fast | ||
564 | * enough when the workload suddenly aquries a lot of physical memory. | ||
565 | * | ||
566 | * In both cases, the guest OS will suffer from memory pressure and | ||
567 | * OOM killer may be triggered. | ||
568 | * By reserving extra 10% of total ram pages, we can keep the system | ||
569 | * much more reliably and response faster in some cases. | ||
570 | */ | ||
571 | if (!selfballoon_reserved_mb) { | ||
572 | reserve_pages = totalram_pages() / 10; | ||
573 | selfballoon_reserved_mb = PAGES2MB(reserve_pages); | ||
574 | } | ||
575 | schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); | ||
576 | |||
577 | return 0; | ||
578 | } | ||
579 | EXPORT_SYMBOL(xen_selfballoon_init); | ||