aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig24
-rw-r--r--drivers/xen/Makefile3
-rw-r--r--drivers/xen/tmem.c170
-rw-r--r--drivers/xen/xen-balloon.c2
-rw-r--r--drivers/xen/xen-selfballoon.c485
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c44
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h2
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c9
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c6
9 files changed, 694 insertions, 51 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 9b700b4a987a..03bc471c3eed 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -9,6 +9,23 @@ config XEN_BALLOON
9 the system to expand the domain's memory allocation, or alternatively 9 the system to expand the domain's memory allocation, or alternatively
10 return unneeded memory to the system. 10 return unneeded memory to the system.
11 11
12config XEN_SELFBALLOONING
13 bool "Dynamically self-balloon kernel memory to target"
14 depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP
15 default n
16 help
17 Self-ballooning dynamically balloons available kernel memory driven
18 by the current usage of anonymous memory ("committed AS") and
19 controlled by various sysfs-settable parameters. Configuring
20 FRONTSWAP is highly recommended; if it is not configured, self-
21 ballooning is disabled by default but can be enabled with the
22 'selfballooning' kernel boot parameter. If FRONTSWAP is configured,
23 frontswap-selfshrinking is enabled by default but can be disabled
24 with the 'noselfshrink' kernel boot parameter; and self-ballooning
25 is enabled by default but can be disabled with the 'noselfballooning'
26 kernel boot parameter. Note that systems without a sufficiently
27 large swap device should not enable self-ballooning.
28
12config XEN_SCRUB_PAGES 29config XEN_SCRUB_PAGES
13 bool "Scrub pages before returning them to system" 30 bool "Scrub pages before returning them to system"
14 depends on XEN_BALLOON 31 depends on XEN_BALLOON
@@ -105,6 +122,13 @@ config SWIOTLB_XEN
105 depends on PCI 122 depends on PCI
106 select SWIOTLB 123 select SWIOTLB
107 124
125config XEN_TMEM
126 bool
127 default y if (CLEANCACHE || FRONTSWAP)
128 help
129 Shim to interface in-kernel Transcendent Memory hooks
130 (e.g. cleancache and frontswap) to Xen tmem hypercalls.
131
108config XEN_PCIDEV_BACKEND 132config XEN_PCIDEV_BACKEND
109 tristate "Xen PCI-device backend driver" 133 tristate "Xen PCI-device backend driver"
110 depends on PCI && X86 && XEN 134 depends on PCI && X86 && XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 35a72ef3afac..72bbb27d7a68 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,6 +1,5 @@
1obj-y += grant-table.o features.o events.o manage.o balloon.o 1obj-y += grant-table.o features.o events.o manage.o balloon.o
2obj-y += xenbus/ 2obj-y += xenbus/
3obj-y += tmem.o
4 3
5nostackp := $(call cc-option, -fno-stack-protector) 4nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_features.o := $(nostackp) 5CFLAGS_features.o := $(nostackp)
@@ -9,12 +8,14 @@ obj-$(CONFIG_BLOCK) += biomerge.o
9obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 8obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
10obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 9obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
11obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o 10obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
11obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
12obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o 12obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
13obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o 13obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
14obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o 14obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
15obj-$(CONFIG_XENFS) += xenfs/ 15obj-$(CONFIG_XENFS) += xenfs/
16obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o 16obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
17obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o 17obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
18obj-$(CONFIG_XEN_TMEM) += tmem.o
18obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o 19obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
19obj-$(CONFIG_XEN_DOM0) += pci.o 20obj-$(CONFIG_XEN_DOM0) += pci.o
20obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ 21obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 816a44959ef0..d369965e8f8a 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Xen implementation for transcendent memory (tmem) 2 * Xen implementation for transcendent memory (tmem)
3 * 3 *
4 * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. 4 * Copyright (C) 2009-2011 Oracle Corp. All rights reserved.
5 * Author: Dan Magenheimer 5 * Author: Dan Magenheimer
6 */ 6 */
7 7
@@ -9,8 +9,14 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/module.h>
12#include <linux/cleancache.h> 13#include <linux/cleancache.h>
13 14
15/* temporary ifdef until include/linux/frontswap.h is upstream */
16#ifdef CONFIG_FRONTSWAP
17#include <linux/frontswap.h>
18#endif
19
14#include <xen/xen.h> 20#include <xen/xen.h>
15#include <xen/interface/xen.h> 21#include <xen/interface/xen.h>
16#include <asm/xen/hypercall.h> 22#include <asm/xen/hypercall.h>
@@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
122 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); 128 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
123} 129}
124 130
125static int xen_tmem_destroy_pool(u32 pool_id) 131int tmem_enabled __read_mostly;
126{ 132EXPORT_SYMBOL(tmem_enabled);
127 struct tmem_oid oid = { { 0 } };
128
129 return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
130}
131
132int tmem_enabled;
133 133
134static int __init enable_tmem(char *s) 134static int __init enable_tmem(char *s)
135{ 135{
@@ -139,6 +139,14 @@ static int __init enable_tmem(char *s)
139 139
140__setup("tmem", enable_tmem); 140__setup("tmem", enable_tmem);
141 141
142#ifdef CONFIG_CLEANCACHE
143static int xen_tmem_destroy_pool(u32 pool_id)
144{
145 struct tmem_oid oid = { { 0 } };
146
147 return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
148}
149
142/* cleancache ops */ 150/* cleancache ops */
143 151
144static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, 152static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
@@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = {
240 .init_shared_fs = tmem_cleancache_init_shared_fs, 248 .init_shared_fs = tmem_cleancache_init_shared_fs,
241 .init_fs = tmem_cleancache_init_fs 249 .init_fs = tmem_cleancache_init_fs
242}; 250};
251#endif
243 252
244static int __init xen_tmem_init(void) 253#ifdef CONFIG_FRONTSWAP
254/* frontswap tmem operations */
255
256/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
257static int tmem_frontswap_poolid;
258
259/*
260 * Swizzling increases objects per swaptype, increasing tmem concurrency
261 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
262 */
263#define SWIZ_BITS 4
264#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
265#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
266#define iswiz(_ind) (_ind >> SWIZ_BITS)
267
268static inline struct tmem_oid oswiz(unsigned type, u32 ind)
245{ 269{
246 struct cleancache_ops old_ops; 270 struct tmem_oid oid = { .oid = { 0 } };
271 oid.oid[0] = _oswiz(type, ind);
272 return oid;
273}
247 274
275/* returns 0 if the page was successfully put into frontswap, -1 if not */
276static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
277 struct page *page)
278{
279 u64 ind64 = (u64)offset;
280 u32 ind = (u32)offset;
281 unsigned long pfn = page_to_pfn(page);
282 int pool = tmem_frontswap_poolid;
283 int ret;
284
285 if (pool < 0)
286 return -1;
287 if (ind64 != ind)
288 return -1;
289 mb(); /* ensure page is quiescent; tmem may address it with an alias */
290 ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
291 /* translate Xen tmem return values to linux semantics */
292 if (ret == 1)
293 return 0;
294 else
295 return -1;
296}
297
298/*
299 * returns 0 if the page was successfully gotten from frontswap, -1 if
300 * was not present (should never happen!)
301 */
302static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
303 struct page *page)
304{
305 u64 ind64 = (u64)offset;
306 u32 ind = (u32)offset;
307 unsigned long pfn = page_to_pfn(page);
308 int pool = tmem_frontswap_poolid;
309 int ret;
310
311 if (pool < 0)
312 return -1;
313 if (ind64 != ind)
314 return -1;
315 ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
316 /* translate Xen tmem return values to linux semantics */
317 if (ret == 1)
318 return 0;
319 else
320 return -1;
321}
322
323/* flush a single page from frontswap */
324static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
325{
326 u64 ind64 = (u64)offset;
327 u32 ind = (u32)offset;
328 int pool = tmem_frontswap_poolid;
329
330 if (pool < 0)
331 return;
332 if (ind64 != ind)
333 return;
334 (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
335}
336
337/* flush all pages from the passed swaptype */
338static void tmem_frontswap_flush_area(unsigned type)
339{
340 int pool = tmem_frontswap_poolid;
341 int ind;
342
343 if (pool < 0)
344 return;
345 for (ind = SWIZ_MASK; ind >= 0; ind--)
346 (void)xen_tmem_flush_object(pool, oswiz(type, ind));
347}
348
349static void tmem_frontswap_init(unsigned ignored)
350{
351 struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
352
353 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
354 if (tmem_frontswap_poolid < 0)
355 tmem_frontswap_poolid =
356 xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
357}
358
359static int __initdata use_frontswap = 1;
360
361static int __init no_frontswap(char *s)
362{
363 use_frontswap = 0;
364 return 1;
365}
366
367__setup("nofrontswap", no_frontswap);
368
369static struct frontswap_ops tmem_frontswap_ops = {
370 .put_page = tmem_frontswap_put_page,
371 .get_page = tmem_frontswap_get_page,
372 .flush_page = tmem_frontswap_flush_page,
373 .flush_area = tmem_frontswap_flush_area,
374 .init = tmem_frontswap_init
375};
376#endif
377
378static int __init xen_tmem_init(void)
379{
248 if (!xen_domain()) 380 if (!xen_domain())
249 return 0; 381 return 0;
382#ifdef CONFIG_FRONTSWAP
383 if (tmem_enabled && use_frontswap) {
384 char *s = "";
385 struct frontswap_ops old_ops =
386 frontswap_register_ops(&tmem_frontswap_ops);
387
388 tmem_frontswap_poolid = -1;
389 if (old_ops.init != NULL)
390 s = " (WARNING: frontswap_ops overridden)";
391 printk(KERN_INFO "frontswap enabled, RAM provided by "
392 "Xen Transcendent Memory\n");
393 }
394#endif
250#ifdef CONFIG_CLEANCACHE 395#ifdef CONFIG_CLEANCACHE
251 BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); 396 BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
252 if (tmem_enabled && use_cleancache) { 397 if (tmem_enabled && use_cleancache) {
253 char *s = ""; 398 char *s = "";
254 old_ops = cleancache_register_ops(&tmem_cleancache_ops); 399 struct cleancache_ops old_ops =
400 cleancache_register_ops(&tmem_cleancache_ops);
255 if (old_ops.init_fs != NULL) 401 if (old_ops.init_fs != NULL)
256 s = " (WARNING: cleancache_ops overridden)"; 402 s = " (WARNING: cleancache_ops overridden)";
257 printk(KERN_INFO "cleancache enabled, RAM provided by " 403 printk(KERN_INFO "cleancache enabled, RAM provided by "
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index a4ff225ee868..5c9dc43c1e94 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -98,6 +98,8 @@ static int __init balloon_init(void)
98 98
99 register_balloon(&balloon_sysdev); 99 register_balloon(&balloon_sysdev);
100 100
101 register_xen_selfballooning(&balloon_sysdev);
102
101 target_watch.callback = watch_target; 103 target_watch.callback = watch_target;
102 xenstore_notifier.notifier_call = balloon_init_watcher; 104 xenstore_notifier.notifier_call = balloon_init_watcher;
103 105
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
new file mode 100644
index 000000000000..010937b5a7c9
--- /dev/null
+++ b/drivers/xen/xen-selfballoon.c
@@ -0,0 +1,485 @@
1/******************************************************************************
2 * Xen selfballoon driver (and optional frontswap self-shrinking driver)
3 *
4 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
5 *
6 * This code complements the cleancache and frontswap patchsets to optimize
7 * support for Xen Transcendent Memory ("tmem"). The policy it implements
8 * is rudimentary and will likely improve over time, but it does work well
9 * enough today.
10 *
11 * Two functionalities are implemented here which both use "control theory"
12 * (feedback) to optimize memory utilization. In a virtualized environment
13 * such as Xen, RAM is often a scarce resource and we would like to ensure
14 * that each of a possibly large number of virtual machines is using RAM
15 * efficiently, i.e. using as little as possible when under light load
16 * and obtaining as much as possible when memory demands are high.
17 * Since RAM needs vary highly dynamically and sometimes dramatically,
18 * "hysteresis" is used, that is, memory target is determined not just
19 * on current data but also on past data stored in the system.
20 *
21 * "Selfballooning" creates memory pressure by managing the Xen balloon
22 * driver to decrease and increase available kernel memory, driven
23 * largely by the target value of "Committed_AS" (see /proc/meminfo).
24 * Since Committed_AS does not account for clean mapped pages (i.e. pages
25 * in RAM that are identical to pages on disk), selfballooning has the
26 * affect of pushing less frequently used clean pagecache pages out of
27 * kernel RAM and, presumably using cleancache, into Xen tmem where
28 * Xen can more efficiently optimize RAM utilization for such pages.
29 *
30 * When kernel memory demand unexpectedly increases faster than Xen, via
31 * the selfballoon driver, is able to (or chooses to) provide usable RAM,
32 * the kernel may invoke swapping. In most cases, frontswap is able
33 * to absorb this swapping into Xen tmem. However, due to the fact
34 * that the kernel swap subsystem assumes swapping occurs to a disk,
35 * swapped pages may sit on the disk for a very long time; even if
36 * the kernel knows the page will never be used again. This is because
37 * the disk space costs very little and can be overwritten when
38 * necessary. When such stale pages are in frontswap, however, they
39 * are taking up valuable real estate. "Frontswap selfshrinking" works
40 * to resolve this: When frontswap activity is otherwise stable
41 * and the guest kernel is not under memory pressure, the "frontswap
42 * selfshrinking" accounts for this by providing pressure to remove some
43 * pages from frontswap and return them to kernel memory.
44 *
45 * For both "selfballooning" and "frontswap-selfshrinking", a worker
46 * thread is used and sysfs tunables are provided to adjust the frequency
47 * and rate of adjustments to achieve the goal, as well as to disable one
48 * or both functions independently.
49 *
50 * While some argue that this functionality can and should be implemented
51 * in userspace, it has been observed that bad things happen (e.g. OOMs).
52 *
53 * System configuration note: Selfballooning should not be enabled on
54 * systems without a sufficiently large swap device configured; for best
55 * results, it is recommended that total swap be increased by the size
56 * of the guest memory. Also, while technically not required to be
57 * configured, it is highly recommended that frontswap also be configured
58 * and enabled when selfballooning is running. So, selfballooning
59 * is disabled by default if frontswap is not configured and can only
60 * be enabled with the "selfballooning" kernel boot option; similarly
61 * selfballooning is enabled by default if frontswap is configured and
62 * can be disabled with the "noselfballooning" kernel boot option. Finally,
63 * when frontswap is configured, frontswap-selfshrinking can be disabled
64 * with the "noselfshrink" kernel boot option.
65 *
66 * Selfballooning is disallowed in domain0 and force-disabled.
67 *
68 */
69
70#include <linux/kernel.h>
71#include <linux/mm.h>
72#include <linux/mman.h>
73
74#include <xen/balloon.h>
75
76#include <xen/tmem.h>
77
78/* Enable/disable with sysfs. */
79static int xen_selfballooning_enabled __read_mostly;
80
81/*
82 * Controls rate at which memory target (this iteration) approaches
83 * ultimate goal when memory need is increasing (up-hysteresis) or
84 * decreasing (down-hysteresis). Higher values of hysteresis cause
85 * slower increases/decreases. The default values for the various
86 * parameters were deemed reasonable by experimentation, may be
87 * workload-dependent, and can all be adjusted via sysfs.
88 */
89static unsigned int selfballoon_downhysteresis __read_mostly = 8;
90static unsigned int selfballoon_uphysteresis __read_mostly = 1;
91
92/* In HZ, controls frequency of worker invocation. */
93static unsigned int selfballoon_interval __read_mostly = 5;
94
95static void selfballoon_process(struct work_struct *work);
96static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
97
98#ifdef CONFIG_FRONTSWAP
99#include <linux/frontswap.h>
100
101/* Enable/disable with sysfs. */
102static bool frontswap_selfshrinking __read_mostly;
103
104/* Enable/disable with kernel boot option. */
105static bool use_frontswap_selfshrink __initdata = true;
106
107/*
108 * The default values for the following parameters were deemed reasonable
109 * by experimentation, may be workload-dependent, and can all be
110 * adjusted via sysfs.
111 */
112
113/* Control rate for frontswap shrinking. Higher hysteresis is slower. */
114static unsigned int frontswap_hysteresis __read_mostly = 20;
115
116/*
117 * Number of selfballoon worker invocations to wait before observing that
118 * frontswap selfshrinking should commence. Note that selfshrinking does
119 * not use a separate worker thread.
120 */
121static unsigned int frontswap_inertia __read_mostly = 3;
122
123/* Countdown to next invocation of frontswap_shrink() */
124static unsigned long frontswap_inertia_counter;
125
126/*
127 * Invoked by the selfballoon worker thread, uses current number of pages
128 * in frontswap (frontswap_curr_pages()), previous status, and control
129 * values (hysteresis and inertia) to determine if frontswap should be
130 * shrunk and what the new frontswap size should be. Note that
131 * frontswap_shrink is essentially a partial swapoff that immediately
132 * transfers pages from the "swap device" (frontswap) back into kernel
133 * RAM; despite the name, frontswap "shrinking" is very different from
134 * the "shrinker" interface used by the kernel MM subsystem to reclaim
135 * memory.
136 */
137static void frontswap_selfshrink(void)
138{
139 static unsigned long cur_frontswap_pages;
140 static unsigned long last_frontswap_pages;
141 static unsigned long tgt_frontswap_pages;
142
143 last_frontswap_pages = cur_frontswap_pages;
144 cur_frontswap_pages = frontswap_curr_pages();
145 if (!cur_frontswap_pages ||
146 (cur_frontswap_pages > last_frontswap_pages)) {
147 frontswap_inertia_counter = frontswap_inertia;
148 return;
149 }
150 if (frontswap_inertia_counter && --frontswap_inertia_counter)
151 return;
152 if (cur_frontswap_pages <= frontswap_hysteresis)
153 tgt_frontswap_pages = 0;
154 else
155 tgt_frontswap_pages = cur_frontswap_pages -
156 (cur_frontswap_pages / frontswap_hysteresis);
157 frontswap_shrink(tgt_frontswap_pages);
158}
159
160static int __init xen_nofrontswap_selfshrink_setup(char *s)
161{
162 use_frontswap_selfshrink = false;
163 return 1;
164}
165
166__setup("noselfshrink", xen_nofrontswap_selfshrink_setup);
167
168/* Disable with kernel boot option. */
169static bool use_selfballooning __initdata = true;
170
171static int __init xen_noselfballooning_setup(char *s)
172{
173 use_selfballooning = false;
174 return 1;
175}
176
177__setup("noselfballooning", xen_noselfballooning_setup);
178#else /* !CONFIG_FRONTSWAP */
179/* Enable with kernel boot option. */
180static bool use_selfballooning __initdata = false;
181
182static int __init xen_selfballooning_setup(char *s)
183{
184 use_selfballooning = true;
185 return 1;
186}
187
188__setup("selfballooning", xen_selfballooning_setup);
189#endif /* CONFIG_FRONTSWAP */
190
191/*
192 * Use current balloon size, the goal (vm_committed_as), and hysteresis
193 * parameters to set a new target balloon size
194 */
195static void selfballoon_process(struct work_struct *work)
196{
197 unsigned long cur_pages, goal_pages, tgt_pages;
198 bool reset_timer = false;
199
200 if (xen_selfballooning_enabled) {
201 cur_pages = balloon_stats.current_pages;
202 tgt_pages = cur_pages; /* default is no change */
203 goal_pages = percpu_counter_read_positive(&vm_committed_as) +
204 balloon_stats.current_pages - totalram_pages;
205#ifdef CONFIG_FRONTSWAP
206 /* allow space for frontswap pages to be repatriated */
207 if (frontswap_selfshrinking && frontswap_enabled)
208 goal_pages += frontswap_curr_pages();
209#endif
210 if (cur_pages > goal_pages)
211 tgt_pages = cur_pages -
212 ((cur_pages - goal_pages) /
213 selfballoon_downhysteresis);
214 else if (cur_pages < goal_pages)
215 tgt_pages = cur_pages +
216 ((goal_pages - cur_pages) /
217 selfballoon_uphysteresis);
218 /* else if cur_pages == goal_pages, no change */
219 balloon_set_new_target(tgt_pages);
220 reset_timer = true;
221 }
222#ifdef CONFIG_FRONTSWAP
223 if (frontswap_selfshrinking && frontswap_enabled) {
224 frontswap_selfshrink();
225 reset_timer = true;
226 }
227#endif
228 if (reset_timer)
229 schedule_delayed_work(&selfballoon_worker,
230 selfballoon_interval * HZ);
231}
232
233#ifdef CONFIG_SYSFS
234
235#include <linux/sysdev.h>
236#include <linux/capability.h>
237
238#define SELFBALLOON_SHOW(name, format, args...) \
239 static ssize_t show_##name(struct sys_device *dev, \
240 struct sysdev_attribute *attr, \
241 char *buf) \
242 { \
243 return sprintf(buf, format, ##args); \
244 }
245
246SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled);
247
248static ssize_t store_selfballooning(struct sys_device *dev,
249 struct sysdev_attribute *attr,
250 const char *buf,
251 size_t count)
252{
253 bool was_enabled = xen_selfballooning_enabled;
254 unsigned long tmp;
255 int err;
256
257 if (!capable(CAP_SYS_ADMIN))
258 return -EPERM;
259
260 err = strict_strtoul(buf, 10, &tmp);
261 if (err || ((tmp != 0) && (tmp != 1)))
262 return -EINVAL;
263
264 xen_selfballooning_enabled = !!tmp;
265 if (!was_enabled && xen_selfballooning_enabled)
266 schedule_delayed_work(&selfballoon_worker,
267 selfballoon_interval * HZ);
268
269 return count;
270}
271
272static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR,
273 show_selfballooning, store_selfballooning);
274
275SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval);
276
277static ssize_t store_selfballoon_interval(struct sys_device *dev,
278 struct sysdev_attribute *attr,
279 const char *buf,
280 size_t count)
281{
282 unsigned long val;
283 int err;
284
285 if (!capable(CAP_SYS_ADMIN))
286 return -EPERM;
287 err = strict_strtoul(buf, 10, &val);
288 if (err || val == 0)
289 return -EINVAL;
290 selfballoon_interval = val;
291 return count;
292}
293
294static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
295 show_selfballoon_interval, store_selfballoon_interval);
296
297SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis);
298
299static ssize_t store_selfballoon_downhys(struct sys_device *dev,
300 struct sysdev_attribute *attr,
301 const char *buf,
302 size_t count)
303{
304 unsigned long val;
305 int err;
306
307 if (!capable(CAP_SYS_ADMIN))
308 return -EPERM;
309 err = strict_strtoul(buf, 10, &val);
310 if (err || val == 0)
311 return -EINVAL;
312 selfballoon_downhysteresis = val;
313 return count;
314}
315
316static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
317 show_selfballoon_downhys, store_selfballoon_downhys);
318
319
320SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis);
321
322static ssize_t store_selfballoon_uphys(struct sys_device *dev,
323 struct sysdev_attribute *attr,
324 const char *buf,
325 size_t count)
326{
327 unsigned long val;
328 int err;
329
330 if (!capable(CAP_SYS_ADMIN))
331 return -EPERM;
332 err = strict_strtoul(buf, 10, &val);
333 if (err || val == 0)
334 return -EINVAL;
335 selfballoon_uphysteresis = val;
336 return count;
337}
338
339static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
340 show_selfballoon_uphys, store_selfballoon_uphys);
341
342#ifdef CONFIG_FRONTSWAP
343SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
344
345static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
346 struct sysdev_attribute *attr,
347 const char *buf,
348 size_t count)
349{
350 bool was_enabled = frontswap_selfshrinking;
351 unsigned long tmp;
352 int err;
353
354 if (!capable(CAP_SYS_ADMIN))
355 return -EPERM;
356 err = strict_strtoul(buf, 10, &tmp);
357 if (err || ((tmp != 0) && (tmp != 1)))
358 return -EINVAL;
359 frontswap_selfshrinking = !!tmp;
360 if (!was_enabled && !xen_selfballooning_enabled &&
361 frontswap_selfshrinking)
362 schedule_delayed_work(&selfballoon_worker,
363 selfballoon_interval * HZ);
364
365 return count;
366}
367
368static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
369 show_frontswap_selfshrinking, store_frontswap_selfshrinking);
370
371SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia);
372
373static ssize_t store_frontswap_inertia(struct sys_device *dev,
374 struct sysdev_attribute *attr,
375 const char *buf,
376 size_t count)
377{
378 unsigned long val;
379 int err;
380
381 if (!capable(CAP_SYS_ADMIN))
382 return -EPERM;
383 err = strict_strtoul(buf, 10, &val);
384 if (err || val == 0)
385 return -EINVAL;
386 frontswap_inertia = val;
387 frontswap_inertia_counter = val;
388 return count;
389}
390
391static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
392 show_frontswap_inertia, store_frontswap_inertia);
393
394SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis);
395
396static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
397 struct sysdev_attribute *attr,
398 const char *buf,
399 size_t count)
400{
401 unsigned long val;
402 int err;
403
404 if (!capable(CAP_SYS_ADMIN))
405 return -EPERM;
406 err = strict_strtoul(buf, 10, &val);
407 if (err || val == 0)
408 return -EINVAL;
409 frontswap_hysteresis = val;
410 return count;
411}
412
413static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
414 show_frontswap_hysteresis, store_frontswap_hysteresis);
415
416#endif /* CONFIG_FRONTSWAP */
417
418static struct attribute *selfballoon_attrs[] = {
419 &attr_selfballooning.attr,
420 &attr_selfballoon_interval.attr,
421 &attr_selfballoon_downhysteresis.attr,
422 &attr_selfballoon_uphysteresis.attr,
423#ifdef CONFIG_FRONTSWAP
424 &attr_frontswap_selfshrinking.attr,
425 &attr_frontswap_hysteresis.attr,
426 &attr_frontswap_inertia.attr,
427#endif
428 NULL
429};
430
431static struct attribute_group selfballoon_group = {
432 .name = "selfballoon",
433 .attrs = selfballoon_attrs
434};
435#endif
436
437int register_xen_selfballooning(struct sys_device *sysdev)
438{
439 int error = -1;
440
441#ifdef CONFIG_SYSFS
442 error = sysfs_create_group(&sysdev->kobj, &selfballoon_group);
443#endif
444 return error;
445}
446EXPORT_SYMBOL(register_xen_selfballooning);
447
448static int __init xen_selfballoon_init(void)
449{
450 bool enable = false;
451
452 if (!xen_domain())
453 return -ENODEV;
454
455 if (xen_initial_domain()) {
456 pr_info("xen/balloon: Xen selfballooning driver "
457 "disabled for domain0.\n");
458 return -ENODEV;
459 }
460
461 xen_selfballooning_enabled = tmem_enabled && use_selfballooning;
462 if (xen_selfballooning_enabled) {
463 pr_info("xen/balloon: Initializing Xen "
464 "selfballooning driver.\n");
465 enable = true;
466 }
467#ifdef CONFIG_FRONTSWAP
468 frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink;
469 if (frontswap_selfshrinking) {
470 pr_info("xen/balloon: Initializing frontswap "
471 "selfshrinking driver.\n");
472 enable = true;
473 }
474#endif
475 if (!enable)
476 return -ENODEV;
477
478 schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
479
480 return 0;
481}
482
483subsys_initcall(xen_selfballoon_init);
484
485MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 739769551e33..bd2f90c9ac8b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev)
378 kfree(to_xenbus_device(dev)); 378 kfree(to_xenbus_device(dev));
379} 379}
380 380
381static ssize_t xendev_show_nodename(struct device *dev, 381static ssize_t nodename_show(struct device *dev,
382 struct device_attribute *attr, char *buf) 382 struct device_attribute *attr, char *buf)
383{ 383{
384 return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); 384 return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
385} 385}
386static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
387 386
388static ssize_t xendev_show_devtype(struct device *dev, 387static ssize_t devtype_show(struct device *dev,
389 struct device_attribute *attr, char *buf) 388 struct device_attribute *attr, char *buf)
390{ 389{
391 return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); 390 return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
392} 391}
393static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
394 392
395static ssize_t xendev_show_modalias(struct device *dev, 393static ssize_t modalias_show(struct device *dev,
396 struct device_attribute *attr, char *buf) 394 struct device_attribute *attr, char *buf)
397{ 395{
398 return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); 396 return sprintf(buf, "%s:%s\n", dev->bus->name,
397 to_xenbus_device(dev)->devicetype);
399} 398}
400static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); 399
400struct device_attribute xenbus_dev_attrs[] = {
401 __ATTR_RO(nodename),
402 __ATTR_RO(devtype),
403 __ATTR_RO(modalias),
404 __ATTR_NULL
405};
406EXPORT_SYMBOL_GPL(xenbus_dev_attrs);
401 407
402int xenbus_probe_node(struct xen_bus_type *bus, 408int xenbus_probe_node(struct xen_bus_type *bus,
403 const char *type, 409 const char *type,
@@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
449 if (err) 455 if (err)
450 goto fail; 456 goto fail;
451 457
452 err = device_create_file(&xendev->dev, &dev_attr_nodename);
453 if (err)
454 goto fail_unregister;
455
456 err = device_create_file(&xendev->dev, &dev_attr_devtype);
457 if (err)
458 goto fail_remove_nodename;
459
460 err = device_create_file(&xendev->dev, &dev_attr_modalias);
461 if (err)
462 goto fail_remove_devtype;
463
464 return 0; 458 return 0;
465fail_remove_devtype:
466 device_remove_file(&xendev->dev, &dev_attr_devtype);
467fail_remove_nodename:
468 device_remove_file(&xendev->dev, &dev_attr_nodename);
469fail_unregister:
470 device_unregister(&xendev->dev);
471fail: 459fail:
472 kfree(xendev); 460 kfree(xendev);
473 return err; 461 return err;
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index 888b9900ca08..b814935378c7 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -48,6 +48,8 @@ struct xen_bus_type
48 struct bus_type bus; 48 struct bus_type bus;
49}; 49};
50 50
51extern struct device_attribute xenbus_dev_attrs[];
52
51extern int xenbus_match(struct device *_dev, struct device_driver *_drv); 53extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
52extern int xenbus_dev_probe(struct device *_dev); 54extern int xenbus_dev_probe(struct device *_dev);
53extern int xenbus_dev_remove(struct device *_dev); 55extern int xenbus_dev_remove(struct device *_dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 6cf467bf63ec..60adf919d78d 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -107,6 +107,9 @@ static int xenbus_uevent_backend(struct device *dev,
107 if (xdev == NULL) 107 if (xdev == NULL)
108 return -ENODEV; 108 return -ENODEV;
109 109
110 if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
111 return -ENOMEM;
112
110 /* stuff we want to pass to /sbin/hotplug */ 113 /* stuff we want to pass to /sbin/hotplug */
111 if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) 114 if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
112 return -ENOMEM; 115 return -ENOMEM;
@@ -183,10 +186,6 @@ static void frontend_changed(struct xenbus_watch *watch,
183 xenbus_otherend_changed(watch, vec, len, 0); 186 xenbus_otherend_changed(watch, vec, len, 0);
184} 187}
185 188
186static struct device_attribute xenbus_backend_dev_attrs[] = {
187 __ATTR_NULL
188};
189
190static struct xen_bus_type xenbus_backend = { 189static struct xen_bus_type xenbus_backend = {
191 .root = "backend", 190 .root = "backend",
192 .levels = 3, /* backend/type/<frontend>/<id> */ 191 .levels = 3, /* backend/type/<frontend>/<id> */
@@ -200,7 +199,7 @@ static struct xen_bus_type xenbus_backend = {
200 .probe = xenbus_dev_probe, 199 .probe = xenbus_dev_probe,
201 .remove = xenbus_dev_remove, 200 .remove = xenbus_dev_remove,
202 .shutdown = xenbus_dev_shutdown, 201 .shutdown = xenbus_dev_shutdown,
203 .dev_attrs = xenbus_backend_dev_attrs, 202 .dev_attrs = xenbus_dev_attrs,
204 }, 203 },
205}; 204};
206 205
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index b6a2690c9d49..ed2ba474a560 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -81,10 +81,6 @@ static void backend_changed(struct xenbus_watch *watch,
81 xenbus_otherend_changed(watch, vec, len, 1); 81 xenbus_otherend_changed(watch, vec, len, 1);
82} 82}
83 83
84static struct device_attribute xenbus_frontend_dev_attrs[] = {
85 __ATTR_NULL
86};
87
88static const struct dev_pm_ops xenbus_pm_ops = { 84static const struct dev_pm_ops xenbus_pm_ops = {
89 .suspend = xenbus_dev_suspend, 85 .suspend = xenbus_dev_suspend,
90 .resume = xenbus_dev_resume, 86 .resume = xenbus_dev_resume,
@@ -106,7 +102,7 @@ static struct xen_bus_type xenbus_frontend = {
106 .probe = xenbus_dev_probe, 102 .probe = xenbus_dev_probe,
107 .remove = xenbus_dev_remove, 103 .remove = xenbus_dev_remove,
108 .shutdown = xenbus_dev_shutdown, 104 .shutdown = xenbus_dev_shutdown,
109 .dev_attrs = xenbus_frontend_dev_attrs, 105 .dev_attrs = xenbus_dev_attrs,
110 106
111 .pm = &xenbus_pm_ops, 107 .pm = &xenbus_pm_ops,
112 }, 108 },