diff options
author | Liu, Jinsong <jinsong.liu@intel.com> | 2012-06-11 08:38:08 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-07-19 15:51:39 -0400 |
commit | f65c9bb3fb725551d3e405f4d092caf24929cebe (patch) | |
tree | d68e9715ac73dd431709ddf2301a313105688b68 /drivers/xen | |
parent | 05e36006adc3046f86f2be8652a22d5f77ebd6ea (diff) |
xen/pcpu: Xen physical cpus online/offline sys interface
This patch provide Xen physical cpus online/offline sys interface.
User can use it for their own purpose, like power saving:
by offlining some cpus when light workload it save power greatly.
Its basic workflow is, user online/offline cpu via sys interface,
then hypercall xen to implement, after done xen inject virq back to dom0,
and then dom0 sync cpu status.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Makefile | 1 | ||||
-rw-r--r-- | drivers/xen/pcpu.c | 371 |
2 files changed, 372 insertions, 0 deletions
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index a7870292bc75..d80bea5535a2 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -17,6 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o | |||
17 | obj-$(CONFIG_XEN_PVHVM) += platform-pci.o | 17 | obj-$(CONFIG_XEN_PVHVM) += platform-pci.o |
18 | obj-$(CONFIG_XEN_TMEM) += tmem.o | 18 | obj-$(CONFIG_XEN_TMEM) += tmem.o |
19 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o | 19 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o |
20 | obj-$(CONFIG_XEN_DOM0) += pcpu.o | ||
20 | obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o | 21 | obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o |
21 | obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o | 22 | obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o |
22 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ | 23 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ |
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c new file mode 100644 index 000000000000..067fcfa1723e --- /dev/null +++ b/drivers/xen/pcpu.c | |||
@@ -0,0 +1,371 @@ | |||
1 | /****************************************************************************** | ||
2 | * pcpu.c | ||
3 | * Management physical cpu in dom0, get pcpu info and provide sys interface | ||
4 | * | ||
5 | * Copyright (c) 2012 Intel Corporation | ||
6 | * Author: Liu, Jinsong <jinsong.liu@intel.com> | ||
7 | * Author: Jiang, Yunhong <yunhong.jiang@intel.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/interrupt.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/cpu.h> | ||
37 | #include <linux/stat.h> | ||
38 | #include <linux/capability.h> | ||
39 | |||
40 | #include <xen/xen.h> | ||
41 | #include <xen/xenbus.h> | ||
42 | #include <xen/events.h> | ||
43 | #include <xen/interface/platform.h> | ||
44 | #include <asm/xen/hypervisor.h> | ||
45 | #include <asm/xen/hypercall.h> | ||
46 | |||
47 | #define XEN_PCPU "xen_cpu: " | ||
48 | |||
49 | /* | ||
50 | * @cpu_id: Xen physical cpu logic number | ||
51 | * @flags: Xen physical cpu status flag | ||
52 | * - XEN_PCPU_FLAGS_ONLINE: cpu is online | ||
53 | * - XEN_PCPU_FLAGS_INVALID: cpu is not present | ||
54 | */ | ||
55 | struct pcpu { | ||
56 | struct list_head list; | ||
57 | struct device dev; | ||
58 | uint32_t cpu_id; | ||
59 | uint32_t flags; | ||
60 | }; | ||
61 | |||
62 | static struct bus_type xen_pcpu_subsys = { | ||
63 | .name = "xen_cpu", | ||
64 | .dev_name = "xen_cpu", | ||
65 | }; | ||
66 | |||
67 | static DEFINE_MUTEX(xen_pcpu_lock); | ||
68 | |||
69 | static LIST_HEAD(xen_pcpus); | ||
70 | |||
71 | static int xen_pcpu_down(uint32_t cpu_id) | ||
72 | { | ||
73 | struct xen_platform_op op = { | ||
74 | .cmd = XENPF_cpu_offline, | ||
75 | .interface_version = XENPF_INTERFACE_VERSION, | ||
76 | .u.cpu_ol.cpuid = cpu_id, | ||
77 | }; | ||
78 | |||
79 | return HYPERVISOR_dom0_op(&op); | ||
80 | } | ||
81 | |||
82 | static int xen_pcpu_up(uint32_t cpu_id) | ||
83 | { | ||
84 | struct xen_platform_op op = { | ||
85 | .cmd = XENPF_cpu_online, | ||
86 | .interface_version = XENPF_INTERFACE_VERSION, | ||
87 | .u.cpu_ol.cpuid = cpu_id, | ||
88 | }; | ||
89 | |||
90 | return HYPERVISOR_dom0_op(&op); | ||
91 | } | ||
92 | |||
93 | static ssize_t show_online(struct device *dev, | ||
94 | struct device_attribute *attr, | ||
95 | char *buf) | ||
96 | { | ||
97 | struct pcpu *cpu = container_of(dev, struct pcpu, dev); | ||
98 | |||
99 | return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE)); | ||
100 | } | ||
101 | |||
102 | static ssize_t __ref store_online(struct device *dev, | ||
103 | struct device_attribute *attr, | ||
104 | const char *buf, size_t count) | ||
105 | { | ||
106 | struct pcpu *pcpu = container_of(dev, struct pcpu, dev); | ||
107 | unsigned long long val; | ||
108 | ssize_t ret; | ||
109 | |||
110 | if (!capable(CAP_SYS_ADMIN)) | ||
111 | return -EPERM; | ||
112 | |||
113 | if (kstrtoull(buf, 0, &val) < 0) | ||
114 | return -EINVAL; | ||
115 | |||
116 | switch (val) { | ||
117 | case 0: | ||
118 | ret = xen_pcpu_down(pcpu->cpu_id); | ||
119 | break; | ||
120 | case 1: | ||
121 | ret = xen_pcpu_up(pcpu->cpu_id); | ||
122 | break; | ||
123 | default: | ||
124 | ret = -EINVAL; | ||
125 | } | ||
126 | |||
127 | if (ret >= 0) | ||
128 | ret = count; | ||
129 | return ret; | ||
130 | } | ||
131 | static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online); | ||
132 | |||
133 | static bool xen_pcpu_online(uint32_t flags) | ||
134 | { | ||
135 | return !!(flags & XEN_PCPU_FLAGS_ONLINE); | ||
136 | } | ||
137 | |||
138 | static void pcpu_online_status(struct xenpf_pcpuinfo *info, | ||
139 | struct pcpu *pcpu) | ||
140 | { | ||
141 | if (xen_pcpu_online(info->flags) && | ||
142 | !xen_pcpu_online(pcpu->flags)) { | ||
143 | /* the pcpu is onlined */ | ||
144 | pcpu->flags |= XEN_PCPU_FLAGS_ONLINE; | ||
145 | kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE); | ||
146 | } else if (!xen_pcpu_online(info->flags) && | ||
147 | xen_pcpu_online(pcpu->flags)) { | ||
148 | /* The pcpu is offlined */ | ||
149 | pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE; | ||
150 | kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | static struct pcpu *get_pcpu(uint32_t cpu_id) | ||
155 | { | ||
156 | struct pcpu *pcpu; | ||
157 | |||
158 | list_for_each_entry(pcpu, &xen_pcpus, list) { | ||
159 | if (pcpu->cpu_id == cpu_id) | ||
160 | return pcpu; | ||
161 | } | ||
162 | |||
163 | return NULL; | ||
164 | } | ||
165 | |||
166 | static void pcpu_release(struct device *dev) | ||
167 | { | ||
168 | struct pcpu *pcpu = container_of(dev, struct pcpu, dev); | ||
169 | |||
170 | list_del(&pcpu->list); | ||
171 | kfree(pcpu); | ||
172 | } | ||
173 | |||
174 | static void unregister_and_remove_pcpu(struct pcpu *pcpu) | ||
175 | { | ||
176 | struct device *dev; | ||
177 | |||
178 | if (!pcpu) | ||
179 | return; | ||
180 | |||
181 | dev = &pcpu->dev; | ||
182 | if (dev->id) | ||
183 | device_remove_file(dev, &dev_attr_online); | ||
184 | |||
185 | /* pcpu remove would be implicitly done */ | ||
186 | device_unregister(dev); | ||
187 | } | ||
188 | |||
189 | static int register_pcpu(struct pcpu *pcpu) | ||
190 | { | ||
191 | struct device *dev; | ||
192 | int err = -EINVAL; | ||
193 | |||
194 | if (!pcpu) | ||
195 | return err; | ||
196 | |||
197 | dev = &pcpu->dev; | ||
198 | dev->bus = &xen_pcpu_subsys; | ||
199 | dev->id = pcpu->cpu_id; | ||
200 | dev->release = pcpu_release; | ||
201 | |||
202 | err = device_register(dev); | ||
203 | if (err) { | ||
204 | pcpu_release(dev); | ||
205 | return err; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Xen never offline cpu0 due to several restrictions | ||
210 | * and assumptions. This basically doesn't add a sys control | ||
211 | * to user, one cannot attempt to offline BSP. | ||
212 | */ | ||
213 | if (dev->id) { | ||
214 | err = device_create_file(dev, &dev_attr_online); | ||
215 | if (err) { | ||
216 | device_unregister(dev); | ||
217 | return err; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info) | ||
225 | { | ||
226 | struct pcpu *pcpu; | ||
227 | int err; | ||
228 | |||
229 | if (info->flags & XEN_PCPU_FLAGS_INVALID) | ||
230 | return ERR_PTR(-ENODEV); | ||
231 | |||
232 | pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL); | ||
233 | if (!pcpu) | ||
234 | return ERR_PTR(-ENOMEM); | ||
235 | |||
236 | INIT_LIST_HEAD(&pcpu->list); | ||
237 | pcpu->cpu_id = info->xen_cpuid; | ||
238 | pcpu->flags = info->flags; | ||
239 | |||
240 | /* Need hold on xen_pcpu_lock before pcpu list manipulations */ | ||
241 | list_add_tail(&pcpu->list, &xen_pcpus); | ||
242 | |||
243 | err = register_pcpu(pcpu); | ||
244 | if (err) { | ||
245 | pr_warning(XEN_PCPU "Failed to register pcpu%u\n", | ||
246 | info->xen_cpuid); | ||
247 | return ERR_PTR(-ENOENT); | ||
248 | } | ||
249 | |||
250 | return pcpu; | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * Caller should hold the xen_pcpu_lock | ||
255 | */ | ||
256 | static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) | ||
257 | { | ||
258 | int ret; | ||
259 | struct pcpu *pcpu = NULL; | ||
260 | struct xenpf_pcpuinfo *info; | ||
261 | struct xen_platform_op op = { | ||
262 | .cmd = XENPF_get_cpuinfo, | ||
263 | .interface_version = XENPF_INTERFACE_VERSION, | ||
264 | .u.pcpu_info.xen_cpuid = cpu, | ||
265 | }; | ||
266 | |||
267 | ret = HYPERVISOR_dom0_op(&op); | ||
268 | if (ret) | ||
269 | return ret; | ||
270 | |||
271 | info = &op.u.pcpu_info; | ||
272 | if (max_cpu) | ||
273 | *max_cpu = info->max_present; | ||
274 | |||
275 | pcpu = get_pcpu(cpu); | ||
276 | |||
277 | /* | ||
278 | * Only those at cpu present map has its sys interface. | ||
279 | */ | ||
280 | if (info->flags & XEN_PCPU_FLAGS_INVALID) { | ||
281 | if (pcpu) | ||
282 | unregister_and_remove_pcpu(pcpu); | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | if (!pcpu) { | ||
287 | pcpu = create_and_register_pcpu(info); | ||
288 | if (IS_ERR_OR_NULL(pcpu)) | ||
289 | return -ENODEV; | ||
290 | } else | ||
291 | pcpu_online_status(info, pcpu); | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * Sync dom0's pcpu information with xen hypervisor's | ||
298 | */ | ||
299 | static int xen_sync_pcpus(void) | ||
300 | { | ||
301 | /* | ||
302 | * Boot cpu always have cpu_id 0 in xen | ||
303 | */ | ||
304 | uint32_t cpu = 0, max_cpu = 0; | ||
305 | int err = 0; | ||
306 | struct pcpu *pcpu, *tmp; | ||
307 | |||
308 | mutex_lock(&xen_pcpu_lock); | ||
309 | |||
310 | while (!err && (cpu <= max_cpu)) { | ||
311 | err = sync_pcpu(cpu, &max_cpu); | ||
312 | cpu++; | ||
313 | } | ||
314 | |||
315 | if (err) | ||
316 | list_for_each_entry_safe(pcpu, tmp, &xen_pcpus, list) | ||
317 | unregister_and_remove_pcpu(pcpu); | ||
318 | |||
319 | mutex_unlock(&xen_pcpu_lock); | ||
320 | |||
321 | return err; | ||
322 | } | ||
323 | |||
324 | static void xen_pcpu_work_fn(struct work_struct *work) | ||
325 | { | ||
326 | xen_sync_pcpus(); | ||
327 | } | ||
328 | static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn); | ||
329 | |||
330 | static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id) | ||
331 | { | ||
332 | schedule_work(&xen_pcpu_work); | ||
333 | return IRQ_HANDLED; | ||
334 | } | ||
335 | |||
336 | static int __init xen_pcpu_init(void) | ||
337 | { | ||
338 | int irq, ret; | ||
339 | |||
340 | if (!xen_initial_domain()) | ||
341 | return -ENODEV; | ||
342 | |||
343 | irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0, | ||
344 | xen_pcpu_interrupt, 0, | ||
345 | "xen-pcpu", NULL); | ||
346 | if (irq < 0) { | ||
347 | pr_warning(XEN_PCPU "Failed to bind pcpu virq\n"); | ||
348 | return irq; | ||
349 | } | ||
350 | |||
351 | ret = subsys_system_register(&xen_pcpu_subsys, NULL); | ||
352 | if (ret) { | ||
353 | pr_warning(XEN_PCPU "Failed to register pcpu subsys\n"); | ||
354 | goto err1; | ||
355 | } | ||
356 | |||
357 | ret = xen_sync_pcpus(); | ||
358 | if (ret) { | ||
359 | pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); | ||
360 | goto err2; | ||
361 | } | ||
362 | |||
363 | return 0; | ||
364 | |||
365 | err2: | ||
366 | bus_unregister(&xen_pcpu_subsys); | ||
367 | err1: | ||
368 | unbind_from_irqhandler(irq, NULL); | ||
369 | return ret; | ||
370 | } | ||
371 | arch_initcall(xen_pcpu_init); | ||