aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor@vmware.com>2010-04-23 13:18:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-24 14:31:26 -0400
commit453dc65931915abc61f92e12bba1fc4747ff5542 (patch)
tree10700e010416253dfee19748f892466ac9e98f96
parentb8af67e2681c693a21f3933e3bdfce4cf66596d3 (diff)
VMware Balloon driver
This is a standalone version of VMware Balloon driver. Ballooning is a technique that allows hypervisor dynamically limit the amount of memory available to the guest (with guest cooperation). In the overcommit scenario, when hypervisor set detects that it needs to shuffle some memory, it instructs the driver to allocate certain number of pages, and the underlying memory gets returned to the hypervisor. Later hypervisor may return memory to the guest by reattaching memory to the pageframes and instructing the driver to "deflate" balloon. We are submitting a standalone driver because KVM maintainer (Avi Kivity) expressed opinion (rightly) that our transport does not fit well into virtqueue paradigm and thus it does not make much sense to integrate with virtio. There were also some concerns whether current ballooning technique is the right thing. If there appears a better framework to achieve this we are prepared to evaluate and switch to using it, but in the meantime we'd like to get this driver upstream. We want to get the driver accepted in distributions so that users do not have to deal with an out-of-tree module and many distributions have "upstream first" requirement. The driver has been shipping for a number of years and users running on VMware platform will have it installed as part of VMware Tools even if it will not come from a distribution, thus there should not be additional risk in pulling the driver into mainline. The driver will only activate if host is VMware so everyone else should not be affected at all. Signed-off-by: Dmitry Torokhov <dtor@vmware.com> Cc: Avi Kivity <avi@redhat.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--drivers/misc/Kconfig16
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/vmware_balloon.c832
4 files changed, 851 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 1cbed97b59cf..dfdb4dba2320 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -22,6 +22,7 @@
22 */ 22 */
23 23
24#include <linux/dmi.h> 24#include <linux/dmi.h>
25#include <linux/module.h>
25#include <asm/div64.h> 26#include <asm/div64.h>
26#include <asm/vmware.h> 27#include <asm/vmware.h>
27#include <asm/x86_init.h> 28#include <asm/x86_init.h>
@@ -101,6 +102,7 @@ int vmware_platform(void)
101 102
102 return 0; 103 return 0;
103} 104}
105EXPORT_SYMBOL(vmware_platform);
104 106
105/* 107/*
106 * VMware hypervisor takes care of exporting a reliable TSC to the guest. 108 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 2191c8d896a0..0d0d625fece2 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -311,6 +311,22 @@ config TI_DAC7512
311 This driver can also be built as a module. If so, the module 311 This driver can also be built as a module. If so, the module
312 will be calles ti_dac7512. 312 will be calles ti_dac7512.
313 313
314config VMWARE_BALLOON
315 tristate "VMware Balloon Driver"
316 depends on X86
317 help
318 This is VMware physical memory management driver which acts
319 like a "balloon" that can be inflated to reclaim physical pages
320 by reserving them in the guest and invalidating them in the
321 monitor, freeing up the underlying machine pages so they can
322 be allocated to other guests. The balloon can also be deflated
323 to allow the guest to use more physical memory.
324
325 If unsure, say N.
326
327 To compile this driver as a module, choose M here: the
328 module will be called vmware_balloon.
329
314source "drivers/misc/c2port/Kconfig" 330source "drivers/misc/c2port/Kconfig"
315source "drivers/misc/eeprom/Kconfig" 331source "drivers/misc/eeprom/Kconfig"
316source "drivers/misc/cb710/Kconfig" 332source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 27c484355414..7b6f7eefdf8d 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_C2PORT) += c2port/
29obj-$(CONFIG_IWMC3200TOP) += iwmc3200top/ 29obj-$(CONFIG_IWMC3200TOP) += iwmc3200top/
30obj-y += eeprom/ 30obj-y += eeprom/
31obj-y += cb710/ 31obj-y += cb710/
32obj-$(CONFIG_VMWARE_BALLOON) += vmware_balloon.o
diff --git a/drivers/misc/vmware_balloon.c b/drivers/misc/vmware_balloon.c
new file mode 100644
index 000000000000..e7161c4e3798
--- /dev/null
+++ b/drivers/misc/vmware_balloon.c
@@ -0,0 +1,832 @@
1/*
2 * VMware Balloon driver.
3 *
4 * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Maintained by: Dmitry Torokhov <dtor@vmware.com>
21 */
22
23/*
24 * This is VMware physical memory management driver for Linux. The driver
25 * acts like a "balloon" that can be inflated to reclaim physical pages by
26 * reserving them in the guest and invalidating them in the monitor,
27 * freeing up the underlying machine pages so they can be allocated to
28 * other guests. The balloon can also be deflated to allow the guest to
29 * use more physical memory. Higher level policies can control the sizes
30 * of balloons in VMs in order to manage physical memory resources.
31 */
32
33//#define DEBUG
34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/mm.h>
39#include <linux/sched.h>
40#include <linux/module.h>
41#include <linux/workqueue.h>
42#include <linux/debugfs.h>
43#include <linux/seq_file.h>
44#include <asm/vmware.h>
45
46MODULE_AUTHOR("VMware, Inc.");
47MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
48MODULE_VERSION("1.2.1.0-K");
49MODULE_ALIAS("dmi:*:svnVMware*:*");
50MODULE_ALIAS("vmware_vmmemctl");
51MODULE_LICENSE("GPL");
52
53/*
54 * Various constants controlling rate of inflaint/deflating balloon,
55 * measured in pages.
56 */
57
58/*
59 * Rate of allocating memory when there is no memory pressure
60 * (driver performs non-sleeping allocations).
61 */
62#define VMW_BALLOON_NOSLEEP_ALLOC_MAX 16384U
63
64/*
65 * Rates of memory allocaton when guest experiences memory pressure
66 * (driver performs sleeping allocations).
67 */
68#define VMW_BALLOON_RATE_ALLOC_MIN 512U
69#define VMW_BALLOON_RATE_ALLOC_MAX 2048U
70#define VMW_BALLOON_RATE_ALLOC_INC 16U
71
72/*
73 * Rates for releasing pages while deflating balloon.
74 */
75#define VMW_BALLOON_RATE_FREE_MIN 512U
76#define VMW_BALLOON_RATE_FREE_MAX 16384U
77#define VMW_BALLOON_RATE_FREE_INC 16U
78
79/*
80 * When guest is under memory pressure, use a reduced page allocation
81 * rate for next several cycles.
82 */
83#define VMW_BALLOON_SLOW_CYCLES 4
84
85/*
86 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
87 * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
88 * __GFP_NOWARN, to suppress page allocation failure warnings.
89 */
90#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN)
91
92/*
93 * Use GFP_HIGHUSER when executing in a separate kernel thread
94 * context and allocation can sleep. This is less stressful to
95 * the guest memory system, since it allows the thread to block
96 * while memory is reclaimed, and won't take pages from emergency
97 * low-memory pools.
98 */
99#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
100
101/* Maximum number of page allocations without yielding processor */
102#define VMW_BALLOON_YIELD_THRESHOLD 1024
103
104
105/*
106 * Hypervisor communication port definitions.
107 */
108#define VMW_BALLOON_HV_PORT 0x5670
109#define VMW_BALLOON_HV_MAGIC 0x456c6d6f
110#define VMW_BALLOON_PROTOCOL_VERSION 2
111#define VMW_BALLOON_GUEST_ID 1 /* Linux */
112
113#define VMW_BALLOON_CMD_START 0
114#define VMW_BALLOON_CMD_GET_TARGET 1
115#define VMW_BALLOON_CMD_LOCK 2
116#define VMW_BALLOON_CMD_UNLOCK 3
117#define VMW_BALLOON_CMD_GUEST_ID 4
118
119/* error codes */
120#define VMW_BALLOON_SUCCESS 0
121#define VMW_BALLOON_FAILURE -1
122#define VMW_BALLOON_ERROR_CMD_INVALID 1
123#define VMW_BALLOON_ERROR_PPN_INVALID 2
124#define VMW_BALLOON_ERROR_PPN_LOCKED 3
125#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
126#define VMW_BALLOON_ERROR_PPN_PINNED 5
127#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
128#define VMW_BALLOON_ERROR_RESET 7
129#define VMW_BALLOON_ERROR_BUSY 8
130
131#define VMWARE_BALLOON_CMD(cmd, data, result) \
132({ \
133 unsigned long __stat, __dummy1, __dummy2; \
134 __asm__ __volatile__ ("inl (%%dx)" : \
135 "=a"(__stat), \
136 "=c"(__dummy1), \
137 "=d"(__dummy2), \
138 "=b"(result) : \
139 "0"(VMW_BALLOON_HV_MAGIC), \
140 "1"(VMW_BALLOON_CMD_##cmd), \
141 "2"(VMW_BALLOON_HV_PORT), \
142 "3"(data) : \
143 "memory"); \
144 result &= -1UL; \
145 __stat & -1UL; \
146})
147
148#ifdef CONFIG_DEBUG_FS
149struct vmballoon_stats {
150 unsigned int timer;
151
152 /* allocation statustics */
153 unsigned int alloc;
154 unsigned int alloc_fail;
155 unsigned int sleep_alloc;
156 unsigned int sleep_alloc_fail;
157 unsigned int refused_alloc;
158 unsigned int refused_free;
159 unsigned int free;
160
161 /* monitor operations */
162 unsigned int lock;
163 unsigned int lock_fail;
164 unsigned int unlock;
165 unsigned int unlock_fail;
166 unsigned int target;
167 unsigned int target_fail;
168 unsigned int start;
169 unsigned int start_fail;
170 unsigned int guest_type;
171 unsigned int guest_type_fail;
172};
173
174#define STATS_INC(stat) (stat)++
175#else
176#define STATS_INC(stat)
177#endif
178
179struct vmballoon {
180
181 /* list of reserved physical pages */
182 struct list_head pages;
183
184 /* transient list of non-balloonable pages */
185 struct list_head refused_pages;
186
187 /* balloon size in pages */
188 unsigned int size;
189 unsigned int target;
190
191 /* reset flag */
192 bool reset_required;
193
194 /* adjustment rates (pages per second) */
195 unsigned int rate_alloc;
196 unsigned int rate_free;
197
198 /* slowdown page allocations for next few cycles */
199 unsigned int slow_allocation_cycles;
200
201#ifdef CONFIG_DEBUG_FS
202 /* statistics */
203 struct vmballoon_stats stats;
204
205 /* debugfs file exporting statistics */
206 struct dentry *dbg_entry;
207#endif
208
209 struct sysinfo sysinfo;
210
211 struct delayed_work dwork;
212};
213
214static struct vmballoon balloon;
215static struct workqueue_struct *vmballoon_wq;
216
217/*
218 * Send "start" command to the host, communicating supported version
219 * of the protocol.
220 */
221static bool vmballoon_send_start(struct vmballoon *b)
222{
223 unsigned long status, dummy;
224
225 STATS_INC(b->stats.start);
226
227 status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_PROTOCOL_VERSION, dummy);
228 if (status == VMW_BALLOON_SUCCESS)
229 return true;
230
231 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
232 STATS_INC(b->stats.start_fail);
233 return false;
234}
235
236static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
237{
238 switch (status) {
239 case VMW_BALLOON_SUCCESS:
240 return true;
241
242 case VMW_BALLOON_ERROR_RESET:
243 b->reset_required = true;
244 /* fall through */
245
246 default:
247 return false;
248 }
249}
250
251/*
252 * Communicate guest type to the host so that it can adjust ballooning
253 * algorithm to the one most appropriate for the guest. This command
254 * is normally issued after sending "start" command and is part of
255 * standard reset sequence.
256 */
257static bool vmballoon_send_guest_id(struct vmballoon *b)
258{
259 unsigned long status, dummy;
260
261 status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy);
262
263 STATS_INC(b->stats.guest_type);
264
265 if (vmballoon_check_status(b, status))
266 return true;
267
268 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
269 STATS_INC(b->stats.guest_type_fail);
270 return false;
271}
272
273/*
274 * Retrieve desired balloon size from the host.
275 */
276static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
277{
278 unsigned long status;
279 unsigned long target;
280 unsigned long limit;
281 u32 limit32;
282
283 /*
284 * si_meminfo() is cheap. Moreover, we want to provide dynamic
285 * max balloon size later. So let us call si_meminfo() every
286 * iteration.
287 */
288 si_meminfo(&b->sysinfo);
289 limit = b->sysinfo.totalram;
290
291 /* Ensure limit fits in 32-bits */
292 limit32 = (u32)limit;
293 if (limit != limit32)
294 return false;
295
296 /* update stats */
297 STATS_INC(b->stats.target);
298
299 status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target);
300 if (vmballoon_check_status(b, status)) {
301 *new_target = target;
302 return true;
303 }
304
305 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
306 STATS_INC(b->stats.target_fail);
307 return false;
308}
309
310/*
311 * Notify the host about allocated page so that host can use it without
312 * fear that guest will need it. Host may reject some pages, we need to
313 * check the return value and maybe submit a different page.
314 */
315static bool vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn)
316{
317 unsigned long status, dummy;
318 u32 pfn32;
319
320 pfn32 = (u32)pfn;
321 if (pfn32 != pfn)
322 return false;
323
324 STATS_INC(b->stats.lock);
325
326 status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy);
327 if (vmballoon_check_status(b, status))
328 return true;
329
330 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
331 STATS_INC(b->stats.lock_fail);
332 return false;
333}
334
335/*
336 * Notify the host that guest intends to release given page back into
337 * the pool of available (to the guest) pages.
338 */
339static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
340{
341 unsigned long status, dummy;
342 u32 pfn32;
343
344 pfn32 = (u32)pfn;
345 if (pfn32 != pfn)
346 return false;
347
348 STATS_INC(b->stats.unlock);
349
350 status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy);
351 if (vmballoon_check_status(b, status))
352 return true;
353
354 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
355 STATS_INC(b->stats.unlock_fail);
356 return false;
357}
358
359/*
360 * Quickly release all pages allocated for the balloon. This function is
361 * called when host decides to "reset" balloon for one reason or another.
362 * Unlike normal "deflate" we do not (shall not) notify host of the pages
363 * being released.
364 */
365static void vmballoon_pop(struct vmballoon *b)
366{
367 struct page *page, *next;
368 unsigned int count = 0;
369
370 list_for_each_entry_safe(page, next, &b->pages, lru) {
371 list_del(&page->lru);
372 __free_page(page);
373 STATS_INC(b->stats.free);
374 b->size--;
375
376 if (++count >= b->rate_free) {
377 count = 0;
378 cond_resched();
379 }
380 }
381}
382
383/*
384 * Perform standard reset sequence by popping the balloon (in case it
385 * is not empty) and then restarting protocol. This operation normally
386 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
387 */
388static void vmballoon_reset(struct vmballoon *b)
389{
390 /* free all pages, skipping monitor unlock */
391 vmballoon_pop(b);
392
393 if (vmballoon_send_start(b)) {
394 b->reset_required = false;
395 if (!vmballoon_send_guest_id(b))
396 pr_err("failed to send guest ID to the host\n");
397 }
398}
399
400/*
401 * Allocate (or reserve) a page for the balloon and notify the host. If host
402 * refuses the page put it on "refuse" list and allocate another one until host
403 * is satisfied. "Refused" pages are released at the end of inflation cycle
404 * (when we allocate b->rate_alloc pages).
405 */
406static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
407{
408 struct page *page;
409 gfp_t flags;
410 bool locked = false;
411
412 do {
413 if (!can_sleep)
414 STATS_INC(b->stats.alloc);
415 else
416 STATS_INC(b->stats.sleep_alloc);
417
418 flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
419 page = alloc_page(flags);
420 if (!page) {
421 if (!can_sleep)
422 STATS_INC(b->stats.alloc_fail);
423 else
424 STATS_INC(b->stats.sleep_alloc_fail);
425 return -ENOMEM;
426 }
427
428 /* inform monitor */
429 locked = vmballoon_send_lock_page(b, page_to_pfn(page));
430 if (!locked) {
431 if (b->reset_required) {
432 __free_page(page);
433 return -EIO;
434 }
435
436 /* place on list of non-balloonable pages, retry allocation */
437 list_add(&page->lru, &b->refused_pages);
438 STATS_INC(b->stats.refused_alloc);
439 }
440 } while (!locked);
441
442 /* track allocated page */
443 list_add(&page->lru, &b->pages);
444
445 /* update balloon size */
446 b->size++;
447
448 return 0;
449}
450
451/*
452 * Release the page allocated for the balloon. Note that we first notify
453 * the host so it can make sure the page will be available for the guest
454 * to use, if needed.
455 */
456static int vmballoon_release_page(struct vmballoon *b, struct page *page)
457{
458 if (!vmballoon_send_unlock_page(b, page_to_pfn(page)))
459 return -EIO;
460
461 list_del(&page->lru);
462
463 /* deallocate page */
464 __free_page(page);
465 STATS_INC(b->stats.free);
466
467 /* update balloon size */
468 b->size--;
469
470 return 0;
471}
472
473/*
474 * Release pages that were allocated while attempting to inflate the
475 * balloon but were refused by the host for one reason or another.
476 */
477static void vmballoon_release_refused_pages(struct vmballoon *b)
478{
479 struct page *page, *next;
480
481 list_for_each_entry_safe(page, next, &b->refused_pages, lru) {
482 list_del(&page->lru);
483 __free_page(page);
484 STATS_INC(b->stats.refused_free);
485 }
486}
487
488/*
489 * Inflate the balloon towards its target size. Note that we try to limit
490 * the rate of allocation to make sure we are not choking the rest of the
491 * system.
492 */
493static void vmballoon_inflate(struct vmballoon *b)
494{
495 unsigned int goal;
496 unsigned int rate;
497 unsigned int i;
498 unsigned int allocations = 0;
499 int error = 0;
500 bool alloc_can_sleep = false;
501
502 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
503
504 /*
505 * First try NOSLEEP page allocations to inflate balloon.
506 *
507 * If we do not throttle nosleep allocations, we can drain all
508 * free pages in the guest quickly (if the balloon target is high).
509 * As a side-effect, draining free pages helps to inform (force)
510 * the guest to start swapping if balloon target is not met yet,
511 * which is a desired behavior. However, balloon driver can consume
512 * all available CPU cycles if too many pages are allocated in a
513 * second. Therefore, we throttle nosleep allocations even when
514 * the guest is not under memory pressure. OTOH, if we have already
515 * predicted that the guest is under memory pressure, then we
516 * slowdown page allocations considerably.
517 */
518
519 goal = b->target - b->size;
520 /*
521 * Start with no sleep allocation rate which may be higher
522 * than sleeping allocation rate.
523 */
524 rate = b->slow_allocation_cycles ?
525 b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX;
526
527 pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n",
528 __func__, goal, rate, b->rate_alloc);
529
530 for (i = 0; i < goal; i++) {
531
532 error = vmballoon_reserve_page(b, alloc_can_sleep);
533 if (error) {
534 if (error != -ENOMEM) {
535 /*
536 * Not a page allocation failure, stop this
537 * cycle. Maybe we'll get new target from
538 * the host soon.
539 */
540 break;
541 }
542
543 if (alloc_can_sleep) {
544 /*
545 * CANSLEEP page allocation failed, so guest
546 * is under severe memory pressure. Quickly
547 * decrease allocation rate.
548 */
549 b->rate_alloc = max(b->rate_alloc / 2,
550 VMW_BALLOON_RATE_ALLOC_MIN);
551 break;
552 }
553
554 /*
555 * NOSLEEP page allocation failed, so the guest is
556 * under memory pressure. Let us slow down page
557 * allocations for next few cycles so that the guest
558 * gets out of memory pressure. Also, if we already
559 * allocated b->rate_alloc pages, let's pause,
560 * otherwise switch to sleeping allocations.
561 */
562 b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
563
564 if (i >= b->rate_alloc)
565 break;
566
567 alloc_can_sleep = true;
568 /* Lower rate for sleeping allocations. */
569 rate = b->rate_alloc;
570 }
571
572 if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) {
573 cond_resched();
574 allocations = 0;
575 }
576
577 if (i >= rate) {
578 /* We allocated enough pages, let's take a break. */
579 break;
580 }
581 }
582
583 /*
584 * We reached our goal without failures so try increasing
585 * allocation rate.
586 */
587 if (error == 0 && i >= b->rate_alloc) {
588 unsigned int mult = i / b->rate_alloc;
589
590 b->rate_alloc =
591 min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
592 VMW_BALLOON_RATE_ALLOC_MAX);
593 }
594
595 vmballoon_release_refused_pages(b);
596}
597
598/*
599 * Decrease the size of the balloon allowing guest to use more memory.
600 */
601static void vmballoon_deflate(struct vmballoon *b)
602{
603 struct page *page, *next;
604 unsigned int i = 0;
605 unsigned int goal;
606 int error;
607
608 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
609
610 /* limit deallocation rate */
611 goal = min(b->size - b->target, b->rate_free);
612
613 pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free);
614
615 /* free pages to reach target */
616 list_for_each_entry_safe(page, next, &b->pages, lru) {
617 error = vmballoon_release_page(b, page);
618 if (error) {
619 /* quickly decrease rate in case of error */
620 b->rate_free = max(b->rate_free / 2,
621 VMW_BALLOON_RATE_FREE_MIN);
622 return;
623 }
624
625 if (++i >= goal)
626 break;
627 }
628
629 /* slowly increase rate if there were no errors */
630 b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC,
631 VMW_BALLOON_RATE_FREE_MAX);
632}
633
634/*
635 * Balloon work function: reset protocol, if needed, get the new size and
636 * adjust balloon as needed. Repeat in 1 sec.
637 */
638static void vmballoon_work(struct work_struct *work)
639{
640 struct delayed_work *dwork = to_delayed_work(work);
641 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
642 unsigned int target;
643
644 STATS_INC(b->stats.timer);
645
646 if (b->reset_required)
647 vmballoon_reset(b);
648
649 if (b->slow_allocation_cycles > 0)
650 b->slow_allocation_cycles--;
651
652 if (vmballoon_send_get_target(b, &target)) {
653 /* update target, adjust size */
654 b->target = target;
655
656 if (b->size < target)
657 vmballoon_inflate(b);
658 else if (b->size > target)
659 vmballoon_deflate(b);
660 }
661
662 queue_delayed_work(vmballoon_wq, dwork, round_jiffies_relative(HZ));
663}
664
665/*
666 * DEBUGFS Interface
667 */
668#ifdef CONFIG_DEBUG_FS
669
670static int vmballoon_debug_show(struct seq_file *f, void *offset)
671{
672 struct vmballoon *b = f->private;
673 struct vmballoon_stats *stats = &b->stats;
674
675 /* format size info */
676 seq_printf(f,
677 "target: %8d pages\n"
678 "current: %8d pages\n",
679 b->target, b->size);
680
681 /* format rate info */
682 seq_printf(f,
683 "rateNoSleepAlloc: %8d pages/sec\n"
684 "rateSleepAlloc: %8d pages/sec\n"
685 "rateFree: %8d pages/sec\n",
686 VMW_BALLOON_NOSLEEP_ALLOC_MAX,
687 b->rate_alloc, b->rate_free);
688
689 seq_printf(f,
690 "\n"
691 "timer: %8u\n"
692 "start: %8u (%4u failed)\n"
693 "guestType: %8u (%4u failed)\n"
694 "lock: %8u (%4u failed)\n"
695 "unlock: %8u (%4u failed)\n"
696 "target: %8u (%4u failed)\n"
697 "primNoSleepAlloc: %8u (%4u failed)\n"
698 "primCanSleepAlloc: %8u (%4u failed)\n"
699 "primFree: %8u\n"
700 "errAlloc: %8u\n"
701 "errFree: %8u\n",
702 stats->timer,
703 stats->start, stats->start_fail,
704 stats->guest_type, stats->guest_type_fail,
705 stats->lock, stats->lock_fail,
706 stats->unlock, stats->unlock_fail,
707 stats->target, stats->target_fail,
708 stats->alloc, stats->alloc_fail,
709 stats->sleep_alloc, stats->sleep_alloc_fail,
710 stats->free,
711 stats->refused_alloc, stats->refused_free);
712
713 return 0;
714}
715
716static int vmballoon_debug_open(struct inode *inode, struct file *file)
717{
718 return single_open(file, vmballoon_debug_show, inode->i_private);
719}
720
721static const struct file_operations vmballoon_debug_fops = {
722 .owner = THIS_MODULE,
723 .open = vmballoon_debug_open,
724 .read = seq_read,
725 .llseek = seq_lseek,
726 .release = single_release,
727};
728
729static int __init vmballoon_debugfs_init(struct vmballoon *b)
730{
731 int error;
732
733 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
734 &vmballoon_debug_fops);
735 if (IS_ERR(b->dbg_entry)) {
736 error = PTR_ERR(b->dbg_entry);
737 pr_err("failed to create debugfs entry, error: %d\n", error);
738 return error;
739 }
740
741 return 0;
742}
743
744static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
745{
746 debugfs_remove(b->dbg_entry);
747}
748
749#else
750
751static inline int vmballoon_debugfs_init(struct vmballoon *b)
752{
753 return 0;
754}
755
756static inline void vmballoon_debugfs_exit(struct vmballoon *b)
757{
758}
759
760#endif /* CONFIG_DEBUG_FS */
761
762static int __init vmballoon_init(void)
763{
764 int error;
765
766 /*
767 * Check if we are running on VMware's hypervisor and bail out
768 * if we are not.
769 */
770 if (!vmware_platform())
771 return -ENODEV;
772
773 vmballoon_wq = create_freezeable_workqueue("vmmemctl");
774 if (!vmballoon_wq) {
775 pr_err("failed to create workqueue\n");
776 return -ENOMEM;
777 }
778
779 INIT_LIST_HEAD(&balloon.pages);
780 INIT_LIST_HEAD(&balloon.refused_pages);
781
782 /* initialize rates */
783 balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
784 balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX;
785
786 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
787
788 /*
789 * Start balloon.
790 */
791 if (!vmballoon_send_start(&balloon)) {
792 pr_err("failed to send start command to the host\n");
793 error = -EIO;
794 goto fail;
795 }
796
797 if (!vmballoon_send_guest_id(&balloon)) {
798 pr_err("failed to send guest ID to the host\n");
799 error = -EIO;
800 goto fail;
801 }
802
803 error = vmballoon_debugfs_init(&balloon);
804 if (error)
805 goto fail;
806
807 queue_delayed_work(vmballoon_wq, &balloon.dwork, 0);
808
809 return 0;
810
811fail:
812 destroy_workqueue(vmballoon_wq);
813 return error;
814}
815module_init(vmballoon_init);
816
817static void __exit vmballoon_exit(void)
818{
819 cancel_delayed_work_sync(&balloon.dwork);
820 destroy_workqueue(vmballoon_wq);
821
822 vmballoon_debugfs_exit(&balloon);
823
824 /*
825 * Deallocate all reserved memory, and reset connection with monitor.
826 * Reset connection before deallocating memory to avoid potential for
827 * additional spurious resets from guest touching deallocated pages.
828 */
829 vmballoon_send_start(&balloon);
830 vmballoon_pop(&balloon);
831}
832module_exit(vmballoon_exit);