aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Magenheimer <dan.magenheimer@oracle.com>2011-05-26 12:02:21 -0400
committerDan Magenheimer <dan.magenheimer@oracle.com>2011-05-26 12:02:21 -0400
commit5bc20fc59706214d9591c11e1938a629d3538c12 (patch)
treeebfb66428ce888560dd982d3ca313f039a53ae40
parent1cfd8bd0f97ae3ad314151cd0fd70454d7b39699 (diff)
xen: cleancache shim to Xen Transcendent Memory
This patch provides a shim between the kernel-internal cleancache API (see Documentation/mm/cleancache.txt) and the Xen Transcendent Memory ABI (see http://oss.oracle.com/projects/tmem). Xen tmem provides "hypervisor RAM" as an ephemeral page-oriented pseudo-RAM store for cleancache pages, shared cleancache pages, and frontswap pages. Tmem provides enterprise-quality concurrency, full save/restore and live migration support, compression and deduplication. A presentation showing up to 8% faster performance and up to 52% reduction in sectors read on a kernel compile workload, despite aggressive in-kernel page reclamation ("self-ballooning") can be found at: http://oss.oracle.com/projects/tmem/dist/documentation/presentations/TranscendentMemoryXenSummit2010.pdf Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> Reviewed-by: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Matthew Wilcox <matthew@wil.cx> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik Van Riel <riel@redhat.com> Cc: Jan Beulich <JBeulich@novell.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Andreas Dilger <adilger@sun.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <joel.becker@oracle.com> Cc: Nitin Gupta <ngupta@vflare.org>
-rw-r--r--arch/x86/include/asm/xen/hypercall.h7
-rw-r--r--drivers/xen/Makefile1
-rw-r--r--drivers/xen/tmem.c264
-rw-r--r--include/xen/interface/xen.h22
4 files changed, 294 insertions, 0 deletions
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 8508bfe52296..d240ea950519 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -447,6 +447,13 @@ HYPERVISOR_hvm_op(int op, void *arg)
447 return _hypercall2(unsigned long, hvm_op, op, arg); 447 return _hypercall2(unsigned long, hvm_op, op, arg);
448} 448}
449 449
450static inline int
451HYPERVISOR_tmem_op(
452 struct tmem_op *op)
453{
454 return _hypercall1(int, tmem_op, op);
455}
456
450static inline void 457static inline void
451MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) 458MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
452{ 459{
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index f420f1ff7f13..7aa6804173ab 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,5 +1,6 @@
1obj-y += grant-table.o features.o events.o manage.o balloon.o 1obj-y += grant-table.o features.o events.o manage.o balloon.o
2obj-y += xenbus/ 2obj-y += xenbus/
3obj-y += tmem.o
3 4
4nostackp := $(call cc-option, -fno-stack-protector) 5nostackp := $(call cc-option, -fno-stack-protector)
5CFLAGS_features.o := $(nostackp) 6CFLAGS_features.o := $(nostackp)
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
new file mode 100644
index 000000000000..816a44959ef0
--- /dev/null
+++ b/drivers/xen/tmem.c
@@ -0,0 +1,264 @@
1/*
2 * Xen implementation for transcendent memory (tmem)
3 *
4 * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
5 * Author: Dan Magenheimer
6 */
7
8#include <linux/kernel.h>
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/pagemap.h>
12#include <linux/cleancache.h>
13
14#include <xen/xen.h>
15#include <xen/interface/xen.h>
16#include <asm/xen/hypercall.h>
17#include <asm/xen/page.h>
18#include <asm/xen/hypervisor.h>
19
20#define TMEM_CONTROL 0
21#define TMEM_NEW_POOL 1
22#define TMEM_DESTROY_POOL 2
23#define TMEM_NEW_PAGE 3
24#define TMEM_PUT_PAGE 4
25#define TMEM_GET_PAGE 5
26#define TMEM_FLUSH_PAGE 6
27#define TMEM_FLUSH_OBJECT 7
28#define TMEM_READ 8
29#define TMEM_WRITE 9
30#define TMEM_XCHG 10
31
32/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
33#define TMEM_POOL_PERSIST 1
34#define TMEM_POOL_SHARED 2
35#define TMEM_POOL_PAGESIZE_SHIFT 4
36#define TMEM_VERSION_SHIFT 24
37
38
39struct tmem_pool_uuid {
40 u64 uuid_lo;
41 u64 uuid_hi;
42};
43
44struct tmem_oid {
45 u64 oid[3];
46};
47
48#define TMEM_POOL_PRIVATE_UUID { 0, 0 }
49
50/* flags for tmem_ops.new_pool */
51#define TMEM_POOL_PERSIST 1
52#define TMEM_POOL_SHARED 2
53
54/* xen tmem foundation ops/hypercalls */
55
56static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid,
57 u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len)
58{
59 struct tmem_op op;
60 int rc = 0;
61
62 op.cmd = tmem_cmd;
63 op.pool_id = tmem_pool;
64 op.u.gen.oid[0] = oid.oid[0];
65 op.u.gen.oid[1] = oid.oid[1];
66 op.u.gen.oid[2] = oid.oid[2];
67 op.u.gen.index = index;
68 op.u.gen.tmem_offset = tmem_offset;
69 op.u.gen.pfn_offset = pfn_offset;
70 op.u.gen.len = len;
71 set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn);
72 rc = HYPERVISOR_tmem_op(&op);
73 return rc;
74}
75
76static int xen_tmem_new_pool(struct tmem_pool_uuid uuid,
77 u32 flags, unsigned long pagesize)
78{
79 struct tmem_op op;
80 int rc = 0, pageshift;
81
82 for (pageshift = 0; pagesize != 1; pageshift++)
83 pagesize >>= 1;
84 flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT;
85 flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT;
86 op.cmd = TMEM_NEW_POOL;
87 op.u.new.uuid[0] = uuid.uuid_lo;
88 op.u.new.uuid[1] = uuid.uuid_hi;
89 op.u.new.flags = flags;
90 rc = HYPERVISOR_tmem_op(&op);
91 return rc;
92}
93
94/* xen generic tmem ops */
95
96static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid,
97 u32 index, unsigned long pfn)
98{
99 unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn;
100
101 return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index,
102 gmfn, 0, 0, 0);
103}
104
105static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid,
106 u32 index, unsigned long pfn)
107{
108 unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn;
109
110 return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index,
111 gmfn, 0, 0, 0);
112}
113
114static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index)
115{
116 return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index,
117 0, 0, 0, 0);
118}
119
120static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
121{
122 return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
123}
124
125static int xen_tmem_destroy_pool(u32 pool_id)
126{
127 struct tmem_oid oid = { { 0 } };
128
129 return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
130}
131
132int tmem_enabled;
133
134static int __init enable_tmem(char *s)
135{
136 tmem_enabled = 1;
137 return 1;
138}
139
140__setup("tmem", enable_tmem);
141
142/* cleancache ops */
143
144static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
145 pgoff_t index, struct page *page)
146{
147 u32 ind = (u32) index;
148 struct tmem_oid oid = *(struct tmem_oid *)&key;
149 unsigned long pfn = page_to_pfn(page);
150
151 if (pool < 0)
152 return;
153 if (ind != index)
154 return;
155 mb(); /* ensure page is quiescent; tmem may address it with an alias */
156 (void)xen_tmem_put_page((u32)pool, oid, ind, pfn);
157}
158
159static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key,
160 pgoff_t index, struct page *page)
161{
162 u32 ind = (u32) index;
163 struct tmem_oid oid = *(struct tmem_oid *)&key;
164 unsigned long pfn = page_to_pfn(page);
165 int ret;
166
167 /* translate return values to linux semantics */
168 if (pool < 0)
169 return -1;
170 if (ind != index)
171 return -1;
172 ret = xen_tmem_get_page((u32)pool, oid, ind, pfn);
173 if (ret == 1)
174 return 0;
175 else
176 return -1;
177}
178
179static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key,
180 pgoff_t index)
181{
182 u32 ind = (u32) index;
183 struct tmem_oid oid = *(struct tmem_oid *)&key;
184
185 if (pool < 0)
186 return;
187 if (ind != index)
188 return;
189 (void)xen_tmem_flush_page((u32)pool, oid, ind);
190}
191
192static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key)
193{
194 struct tmem_oid oid = *(struct tmem_oid *)&key;
195
196 if (pool < 0)
197 return;
198 (void)xen_tmem_flush_object((u32)pool, oid);
199}
200
201static void tmem_cleancache_flush_fs(int pool)
202{
203 if (pool < 0)
204 return;
205 (void)xen_tmem_destroy_pool((u32)pool);
206}
207
208static int tmem_cleancache_init_fs(size_t pagesize)
209{
210 struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID;
211
212 return xen_tmem_new_pool(uuid_private, 0, pagesize);
213}
214
215static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
216{
217 struct tmem_pool_uuid shared_uuid;
218
219 shared_uuid.uuid_lo = *(u64 *)uuid;
220 shared_uuid.uuid_hi = *(u64 *)(&uuid[8]);
221 return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
222}
223
224static int use_cleancache = 1;
225
226static int __init no_cleancache(char *s)
227{
228 use_cleancache = 0;
229 return 1;
230}
231
232__setup("nocleancache", no_cleancache);
233
234static struct cleancache_ops tmem_cleancache_ops = {
235 .put_page = tmem_cleancache_put_page,
236 .get_page = tmem_cleancache_get_page,
237 .flush_page = tmem_cleancache_flush_page,
238 .flush_inode = tmem_cleancache_flush_inode,
239 .flush_fs = tmem_cleancache_flush_fs,
240 .init_shared_fs = tmem_cleancache_init_shared_fs,
241 .init_fs = tmem_cleancache_init_fs
242};
243
244static int __init xen_tmem_init(void)
245{
246 struct cleancache_ops old_ops;
247
248 if (!xen_domain())
249 return 0;
250#ifdef CONFIG_CLEANCACHE
251 BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
252 if (tmem_enabled && use_cleancache) {
253 char *s = "";
254 old_ops = cleancache_register_ops(&tmem_cleancache_ops);
255 if (old_ops.init_fs != NULL)
256 s = " (WARNING: cleancache_ops overridden)";
257 printk(KERN_INFO "cleancache enabled, RAM provided by "
258 "Xen Transcendent Memory%s\n", s);
259 }
260#endif
261 return 0;
262}
263
264module_init(xen_tmem_init)
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index b33257bc7e83..70213b4515eb 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -58,6 +58,7 @@
58#define __HYPERVISOR_event_channel_op 32 58#define __HYPERVISOR_event_channel_op 32
59#define __HYPERVISOR_physdev_op 33 59#define __HYPERVISOR_physdev_op 33
60#define __HYPERVISOR_hvm_op 34 60#define __HYPERVISOR_hvm_op 34
61#define __HYPERVISOR_tmem_op 38
61 62
62/* Architecture-specific hypercall definitions. */ 63/* Architecture-specific hypercall definitions. */
63#define __HYPERVISOR_arch_0 48 64#define __HYPERVISOR_arch_0 48
@@ -461,6 +462,27 @@ typedef uint8_t xen_domain_handle_t[16];
461#define __mk_unsigned_long(x) x ## UL 462#define __mk_unsigned_long(x) x ## UL
462#define mk_unsigned_long(x) __mk_unsigned_long(x) 463#define mk_unsigned_long(x) __mk_unsigned_long(x)
463 464
465#define TMEM_SPEC_VERSION 1
466
467struct tmem_op {
468 uint32_t cmd;
469 int32_t pool_id;
470 union {
471 struct { /* for cmd == TMEM_NEW_POOL */
472 uint64_t uuid[2];
473 uint32_t flags;
474 } new;
475 struct {
476 uint64_t oid[3];
477 uint32_t index;
478 uint32_t tmem_offset;
479 uint32_t pfn_offset;
480 uint32_t len;
481 GUEST_HANDLE(void) gmfn; /* guest machine page frame */
482 } gen;
483 } u;
484};
485
464#else /* __ASSEMBLY__ */ 486#else /* __ASSEMBLY__ */
465 487
466/* In assembly code we cannot use C numeric constant suffixes. */ 488/* In assembly code we cannot use C numeric constant suffixes. */