diff options
author | Dan Magenheimer <dan.magenheimer@oracle.com> | 2011-05-26 12:02:21 -0400 |
---|---|---|
committer | Dan Magenheimer <dan.magenheimer@oracle.com> | 2011-05-26 12:02:21 -0400 |
commit | 5bc20fc59706214d9591c11e1938a629d3538c12 (patch) | |
tree | ebfb66428ce888560dd982d3ca313f039a53ae40 /drivers/xen | |
parent | 1cfd8bd0f97ae3ad314151cd0fd70454d7b39699 (diff) |
xen: cleancache shim to Xen Transcendent Memory
This patch provides a shim between the kernel-internal cleancache
API (see Documentation/mm/cleancache.txt) and the Xen Transcendent
Memory ABI (see http://oss.oracle.com/projects/tmem).
Xen tmem provides "hypervisor RAM" as an ephemeral page-oriented
pseudo-RAM store for cleancache pages, shared cleancache pages,
and frontswap pages. Tmem provides enterprise-quality concurrency,
full save/restore and live migration support, compression
and deduplication.
A presentation showing up to 8% faster performance and up to 52%
reduction in sectors read on a kernel compile workload, despite
aggressive in-kernel page reclamation ("self-ballooning") can be
found at:
http://oss.oracle.com/projects/tmem/dist/documentation/presentations/TranscendentMemoryXenSummit2010.pdf
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik Van Riel <riel@redhat.com>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Andreas Dilger <adilger@sun.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <joel.becker@oracle.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Makefile | 1 | ||||
-rw-r--r-- | drivers/xen/tmem.c | 264 |
2 files changed, 265 insertions, 0 deletions
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index f420f1ff7f13..7aa6804173ab 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,5 +1,6 @@ | |||
1 | obj-y += grant-table.o features.o events.o manage.o balloon.o | 1 | obj-y += grant-table.o features.o events.o manage.o balloon.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-y += tmem.o | ||
3 | 4 | ||
4 | nostackp := $(call cc-option, -fno-stack-protector) | 5 | nostackp := $(call cc-option, -fno-stack-protector) |
5 | CFLAGS_features.o := $(nostackp) | 6 | CFLAGS_features.o := $(nostackp) |
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c new file mode 100644 index 000000000000..816a44959ef0 --- /dev/null +++ b/drivers/xen/tmem.c | |||
@@ -0,0 +1,264 @@ | |||
1 | /* | ||
2 | * Xen implementation for transcendent memory (tmem) | ||
3 | * | ||
4 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | ||
5 | * Author: Dan Magenheimer | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/cleancache.h> | ||
13 | |||
14 | #include <xen/xen.h> | ||
15 | #include <xen/interface/xen.h> | ||
16 | #include <asm/xen/hypercall.h> | ||
17 | #include <asm/xen/page.h> | ||
18 | #include <asm/xen/hypervisor.h> | ||
19 | |||
20 | #define TMEM_CONTROL 0 | ||
21 | #define TMEM_NEW_POOL 1 | ||
22 | #define TMEM_DESTROY_POOL 2 | ||
23 | #define TMEM_NEW_PAGE 3 | ||
24 | #define TMEM_PUT_PAGE 4 | ||
25 | #define TMEM_GET_PAGE 5 | ||
26 | #define TMEM_FLUSH_PAGE 6 | ||
27 | #define TMEM_FLUSH_OBJECT 7 | ||
28 | #define TMEM_READ 8 | ||
29 | #define TMEM_WRITE 9 | ||
30 | #define TMEM_XCHG 10 | ||
31 | |||
32 | /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ | ||
33 | #define TMEM_POOL_PERSIST 1 | ||
34 | #define TMEM_POOL_SHARED 2 | ||
35 | #define TMEM_POOL_PAGESIZE_SHIFT 4 | ||
36 | #define TMEM_VERSION_SHIFT 24 | ||
37 | |||
38 | |||
39 | struct tmem_pool_uuid { | ||
40 | u64 uuid_lo; | ||
41 | u64 uuid_hi; | ||
42 | }; | ||
43 | |||
44 | struct tmem_oid { | ||
45 | u64 oid[3]; | ||
46 | }; | ||
47 | |||
48 | #define TMEM_POOL_PRIVATE_UUID { 0, 0 } | ||
49 | |||
50 | /* flags for tmem_ops.new_pool */ | ||
51 | #define TMEM_POOL_PERSIST 1 | ||
52 | #define TMEM_POOL_SHARED 2 | ||
53 | |||
54 | /* xen tmem foundation ops/hypercalls */ | ||
55 | |||
56 | static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid, | ||
57 | u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len) | ||
58 | { | ||
59 | struct tmem_op op; | ||
60 | int rc = 0; | ||
61 | |||
62 | op.cmd = tmem_cmd; | ||
63 | op.pool_id = tmem_pool; | ||
64 | op.u.gen.oid[0] = oid.oid[0]; | ||
65 | op.u.gen.oid[1] = oid.oid[1]; | ||
66 | op.u.gen.oid[2] = oid.oid[2]; | ||
67 | op.u.gen.index = index; | ||
68 | op.u.gen.tmem_offset = tmem_offset; | ||
69 | op.u.gen.pfn_offset = pfn_offset; | ||
70 | op.u.gen.len = len; | ||
71 | set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn); | ||
72 | rc = HYPERVISOR_tmem_op(&op); | ||
73 | return rc; | ||
74 | } | ||
75 | |||
76 | static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, | ||
77 | u32 flags, unsigned long pagesize) | ||
78 | { | ||
79 | struct tmem_op op; | ||
80 | int rc = 0, pageshift; | ||
81 | |||
82 | for (pageshift = 0; pagesize != 1; pageshift++) | ||
83 | pagesize >>= 1; | ||
84 | flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT; | ||
85 | flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT; | ||
86 | op.cmd = TMEM_NEW_POOL; | ||
87 | op.u.new.uuid[0] = uuid.uuid_lo; | ||
88 | op.u.new.uuid[1] = uuid.uuid_hi; | ||
89 | op.u.new.flags = flags; | ||
90 | rc = HYPERVISOR_tmem_op(&op); | ||
91 | return rc; | ||
92 | } | ||
93 | |||
94 | /* xen generic tmem ops */ | ||
95 | |||
96 | static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, | ||
97 | u32 index, unsigned long pfn) | ||
98 | { | ||
99 | unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; | ||
100 | |||
101 | return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, | ||
102 | gmfn, 0, 0, 0); | ||
103 | } | ||
104 | |||
105 | static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, | ||
106 | u32 index, unsigned long pfn) | ||
107 | { | ||
108 | unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; | ||
109 | |||
110 | return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, | ||
111 | gmfn, 0, 0, 0); | ||
112 | } | ||
113 | |||
114 | static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) | ||
115 | { | ||
116 | return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index, | ||
117 | 0, 0, 0, 0); | ||
118 | } | ||
119 | |||
120 | static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) | ||
121 | { | ||
122 | return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); | ||
123 | } | ||
124 | |||
125 | static int xen_tmem_destroy_pool(u32 pool_id) | ||
126 | { | ||
127 | struct tmem_oid oid = { { 0 } }; | ||
128 | |||
129 | return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); | ||
130 | } | ||
131 | |||
132 | int tmem_enabled; | ||
133 | |||
134 | static int __init enable_tmem(char *s) | ||
135 | { | ||
136 | tmem_enabled = 1; | ||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | __setup("tmem", enable_tmem); | ||
141 | |||
142 | /* cleancache ops */ | ||
143 | |||
144 | static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, | ||
145 | pgoff_t index, struct page *page) | ||
146 | { | ||
147 | u32 ind = (u32) index; | ||
148 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
149 | unsigned long pfn = page_to_pfn(page); | ||
150 | |||
151 | if (pool < 0) | ||
152 | return; | ||
153 | if (ind != index) | ||
154 | return; | ||
155 | mb(); /* ensure page is quiescent; tmem may address it with an alias */ | ||
156 | (void)xen_tmem_put_page((u32)pool, oid, ind, pfn); | ||
157 | } | ||
158 | |||
159 | static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, | ||
160 | pgoff_t index, struct page *page) | ||
161 | { | ||
162 | u32 ind = (u32) index; | ||
163 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
164 | unsigned long pfn = page_to_pfn(page); | ||
165 | int ret; | ||
166 | |||
167 | /* translate return values to linux semantics */ | ||
168 | if (pool < 0) | ||
169 | return -1; | ||
170 | if (ind != index) | ||
171 | return -1; | ||
172 | ret = xen_tmem_get_page((u32)pool, oid, ind, pfn); | ||
173 | if (ret == 1) | ||
174 | return 0; | ||
175 | else | ||
176 | return -1; | ||
177 | } | ||
178 | |||
179 | static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key, | ||
180 | pgoff_t index) | ||
181 | { | ||
182 | u32 ind = (u32) index; | ||
183 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
184 | |||
185 | if (pool < 0) | ||
186 | return; | ||
187 | if (ind != index) | ||
188 | return; | ||
189 | (void)xen_tmem_flush_page((u32)pool, oid, ind); | ||
190 | } | ||
191 | |||
192 | static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key) | ||
193 | { | ||
194 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
195 | |||
196 | if (pool < 0) | ||
197 | return; | ||
198 | (void)xen_tmem_flush_object((u32)pool, oid); | ||
199 | } | ||
200 | |||
201 | static void tmem_cleancache_flush_fs(int pool) | ||
202 | { | ||
203 | if (pool < 0) | ||
204 | return; | ||
205 | (void)xen_tmem_destroy_pool((u32)pool); | ||
206 | } | ||
207 | |||
208 | static int tmem_cleancache_init_fs(size_t pagesize) | ||
209 | { | ||
210 | struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID; | ||
211 | |||
212 | return xen_tmem_new_pool(uuid_private, 0, pagesize); | ||
213 | } | ||
214 | |||
215 | static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) | ||
216 | { | ||
217 | struct tmem_pool_uuid shared_uuid; | ||
218 | |||
219 | shared_uuid.uuid_lo = *(u64 *)uuid; | ||
220 | shared_uuid.uuid_hi = *(u64 *)(&uuid[8]); | ||
221 | return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); | ||
222 | } | ||
223 | |||
224 | static int use_cleancache = 1; | ||
225 | |||
226 | static int __init no_cleancache(char *s) | ||
227 | { | ||
228 | use_cleancache = 0; | ||
229 | return 1; | ||
230 | } | ||
231 | |||
232 | __setup("nocleancache", no_cleancache); | ||
233 | |||
234 | static struct cleancache_ops tmem_cleancache_ops = { | ||
235 | .put_page = tmem_cleancache_put_page, | ||
236 | .get_page = tmem_cleancache_get_page, | ||
237 | .flush_page = tmem_cleancache_flush_page, | ||
238 | .flush_inode = tmem_cleancache_flush_inode, | ||
239 | .flush_fs = tmem_cleancache_flush_fs, | ||
240 | .init_shared_fs = tmem_cleancache_init_shared_fs, | ||
241 | .init_fs = tmem_cleancache_init_fs | ||
242 | }; | ||
243 | |||
244 | static int __init xen_tmem_init(void) | ||
245 | { | ||
246 | struct cleancache_ops old_ops; | ||
247 | |||
248 | if (!xen_domain()) | ||
249 | return 0; | ||
250 | #ifdef CONFIG_CLEANCACHE | ||
251 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); | ||
252 | if (tmem_enabled && use_cleancache) { | ||
253 | char *s = ""; | ||
254 | old_ops = cleancache_register_ops(&tmem_cleancache_ops); | ||
255 | if (old_ops.init_fs != NULL) | ||
256 | s = " (WARNING: cleancache_ops overridden)"; | ||
257 | printk(KERN_INFO "cleancache enabled, RAM provided by " | ||
258 | "Xen Transcendent Memory%s\n", s); | ||
259 | } | ||
260 | #endif | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | module_init(xen_tmem_init) | ||