diff options
author | Dan Magenheimer <dan.magenheimer@oracle.com> | 2011-09-27 10:47:58 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2011-10-14 12:36:08 -0400 |
commit | 38a1ed4f039db32b418007ac365076cf53647ebd (patch) | |
tree | 396d6b42817baa68d753f23a9784e81c5e23864d /drivers/xen/xen-selfballoon.c | |
parent | 1f1503ba096d3a394d1454dac77467092ca996e6 (diff) |
xen: Fix selfballooning and ensure it doesn't go too far
The balloon driver's "current_pages" is very different from
totalram_pages. Self-ballooning needs to be driven by
the latter. Also, Committed_AS doesn't account for pages
used by the kernel so:
1) Add totalreserve_pages to Committed_AS for the normal target.
2) Enforce a floor for when there are little or no user-space threads
using memory (e.g. single-user mode) to avoid OOMs. The floor
function includes a "min_usable_mb" tuneable in case we discover
later that the floor function is still too aggressive in some
workloads, though likely it will not be needed.
Changes since version 4:
- change floor calculation so that it is not as aggressive; this version
uses a piecewise linear function similar to minimum_target in the 2.6.18
balloon driver, but modified to add to totalreserve_pages instead of
subtract from max_pfn, the 2.6.18 version causes OOMs on recent kernels
because the kernel has expanded over time
- change safety_margin to min_usable_mb and comment on its use
- since committed_as does NOT include kernel space (and other reserved
pages), totalreserve_pages is now added to committed_as. The result is
less aggressive self-ballooning, but theoretically more appropriate.
Changes since version 3:
- missing include causes compile problem when CONFIG_FRONTSWAP is disabled
- add comments after includes
Changes since version 2:
- missing include causes compile problem only on 32-bit
Changes since version 1:
- tuneable safety margin added
[v5: avi.miller@oracle.com: still too aggressive, seeing some OOMs]
[v4: konrad.wilk@oracle.com: fix compile when CONFIG_FRONTSWAP is disabled]
[v3: guru.anbalagane@oracle.com: fix 32-bit compile]
[v2: konrad.wilk@oracle.com: make safety margin tuneable]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
[v1: Altered description and added an extra include]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers/xen/xen-selfballoon.c')
-rw-r--r-- | drivers/xen/xen-selfballoon.c | 67 |
1 files changed, 63 insertions, 4 deletions
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 1b4afd81f872..ff3f2e423af4 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c | |||
@@ -68,6 +68,8 @@ | |||
68 | */ | 68 | */ |
69 | 69 | ||
70 | #include <linux/kernel.h> | 70 | #include <linux/kernel.h> |
71 | #include <linux/bootmem.h> | ||
72 | #include <linux/swap.h> | ||
71 | #include <linux/mm.h> | 73 | #include <linux/mm.h> |
72 | #include <linux/mman.h> | 74 | #include <linux/mman.h> |
73 | #include <linux/workqueue.h> | 75 | #include <linux/workqueue.h> |
@@ -92,6 +94,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1; | |||
92 | /* In HZ, controls frequency of worker invocation. */ | 94 | /* In HZ, controls frequency of worker invocation. */ |
93 | static unsigned int selfballoon_interval __read_mostly = 5; | 95 | static unsigned int selfballoon_interval __read_mostly = 5; |
94 | 96 | ||
97 | /* | ||
98 | * Minimum usable RAM in MB for selfballooning target for balloon. | ||
99 | * If non-zero, it is added to totalreserve_pages and self-ballooning | ||
100 | * will not balloon below the sum. If zero, a piecewise linear function | ||
101 | * is calculated as a minimum and added to totalreserve_pages. Note that | ||
102 | * setting this value indiscriminately may cause OOMs and crashes. | ||
103 | */ | ||
104 | static unsigned int selfballoon_min_usable_mb; | ||
105 | |||
95 | static void selfballoon_process(struct work_struct *work); | 106 | static void selfballoon_process(struct work_struct *work); |
96 | static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); | 107 | static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); |
97 | 108 | ||
@@ -188,20 +199,23 @@ static int __init xen_selfballooning_setup(char *s) | |||
188 | __setup("selfballooning", xen_selfballooning_setup); | 199 | __setup("selfballooning", xen_selfballooning_setup); |
189 | #endif /* CONFIG_FRONTSWAP */ | 200 | #endif /* CONFIG_FRONTSWAP */ |
190 | 201 | ||
202 | #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) | ||
203 | |||
191 | /* | 204 | /* |
192 | * Use current balloon size, the goal (vm_committed_as), and hysteresis | 205 | * Use current balloon size, the goal (vm_committed_as), and hysteresis |
193 | * parameters to set a new target balloon size | 206 | * parameters to set a new target balloon size |
194 | */ | 207 | */ |
195 | static void selfballoon_process(struct work_struct *work) | 208 | static void selfballoon_process(struct work_struct *work) |
196 | { | 209 | { |
197 | unsigned long cur_pages, goal_pages, tgt_pages; | 210 | unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; |
211 | unsigned long useful_pages; | ||
198 | bool reset_timer = false; | 212 | bool reset_timer = false; |
199 | 213 | ||
200 | if (xen_selfballooning_enabled) { | 214 | if (xen_selfballooning_enabled) { |
201 | cur_pages = balloon_stats.current_pages; | 215 | cur_pages = totalram_pages; |
202 | tgt_pages = cur_pages; /* default is no change */ | 216 | tgt_pages = cur_pages; /* default is no change */ |
203 | goal_pages = percpu_counter_read_positive(&vm_committed_as) + | 217 | goal_pages = percpu_counter_read_positive(&vm_committed_as) + |
204 | balloon_stats.current_pages - totalram_pages; | 218 | totalreserve_pages; |
205 | #ifdef CONFIG_FRONTSWAP | 219 | #ifdef CONFIG_FRONTSWAP |
206 | /* allow space for frontswap pages to be repatriated */ | 220 | /* allow space for frontswap pages to be repatriated */ |
207 | if (frontswap_selfshrinking && frontswap_enabled) | 221 | if (frontswap_selfshrinking && frontswap_enabled) |
@@ -216,7 +230,26 @@ static void selfballoon_process(struct work_struct *work) | |||
216 | ((goal_pages - cur_pages) / | 230 | ((goal_pages - cur_pages) / |
217 | selfballoon_uphysteresis); | 231 | selfballoon_uphysteresis); |
218 | /* else if cur_pages == goal_pages, no change */ | 232 | /* else if cur_pages == goal_pages, no change */ |
219 | balloon_set_new_target(tgt_pages); | 233 | useful_pages = max_pfn - totalreserve_pages; |
234 | if (selfballoon_min_usable_mb != 0) | ||
235 | floor_pages = totalreserve_pages + | ||
236 | MB2PAGES(selfballoon_min_usable_mb); | ||
237 | /* piecewise linear function ending in ~3% slope */ | ||
238 | else if (useful_pages < MB2PAGES(16)) | ||
239 | floor_pages = max_pfn; /* not worth ballooning */ | ||
240 | else if (useful_pages < MB2PAGES(64)) | ||
241 | floor_pages = totalreserve_pages + MB2PAGES(16) + | ||
242 | ((useful_pages - MB2PAGES(16)) >> 1); | ||
243 | else if (useful_pages < MB2PAGES(512)) | ||
244 | floor_pages = totalreserve_pages + MB2PAGES(40) + | ||
245 | ((useful_pages - MB2PAGES(40)) >> 3); | ||
246 | else /* useful_pages >= MB2PAGES(512) */ | ||
247 | floor_pages = totalreserve_pages + MB2PAGES(99) + | ||
248 | ((useful_pages - MB2PAGES(99)) >> 5); | ||
249 | if (tgt_pages < floor_pages) | ||
250 | tgt_pages = floor_pages; | ||
251 | balloon_set_new_target(tgt_pages + | ||
252 | balloon_stats.current_pages - totalram_pages); | ||
220 | reset_timer = true; | 253 | reset_timer = true; |
221 | } | 254 | } |
222 | #ifdef CONFIG_FRONTSWAP | 255 | #ifdef CONFIG_FRONTSWAP |
@@ -339,6 +372,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev, | |||
339 | static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, | 372 | static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, |
340 | show_selfballoon_uphys, store_selfballoon_uphys); | 373 | show_selfballoon_uphys, store_selfballoon_uphys); |
341 | 374 | ||
375 | SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", | ||
376 | selfballoon_min_usable_mb); | ||
377 | |||
378 | static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev, | ||
379 | struct sysdev_attribute *attr, | ||
380 | const char *buf, | ||
381 | size_t count) | ||
382 | { | ||
383 | unsigned long val; | ||
384 | int err; | ||
385 | |||
386 | if (!capable(CAP_SYS_ADMIN)) | ||
387 | return -EPERM; | ||
388 | err = strict_strtoul(buf, 10, &val); | ||
389 | if (err || val == 0) | ||
390 | return -EINVAL; | ||
391 | selfballoon_min_usable_mb = val; | ||
392 | return count; | ||
393 | } | ||
394 | |||
395 | static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, | ||
396 | show_selfballoon_min_usable_mb, | ||
397 | store_selfballoon_min_usable_mb); | ||
398 | |||
399 | |||
342 | #ifdef CONFIG_FRONTSWAP | 400 | #ifdef CONFIG_FRONTSWAP |
343 | SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); | 401 | SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); |
344 | 402 | ||
@@ -420,6 +478,7 @@ static struct attribute *selfballoon_attrs[] = { | |||
420 | &attr_selfballoon_interval.attr, | 478 | &attr_selfballoon_interval.attr, |
421 | &attr_selfballoon_downhysteresis.attr, | 479 | &attr_selfballoon_downhysteresis.attr, |
422 | &attr_selfballoon_uphysteresis.attr, | 480 | &attr_selfballoon_uphysteresis.attr, |
481 | &attr_selfballoon_min_usable_mb.attr, | ||
423 | #ifdef CONFIG_FRONTSWAP | 482 | #ifdef CONFIG_FRONTSWAP |
424 | &attr_frontswap_selfshrinking.attr, | 483 | &attr_frontswap_selfshrinking.attr, |
425 | &attr_frontswap_hysteresis.attr, | 484 | &attr_frontswap_hysteresis.attr, |