diff options
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 28 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 2 | ||||
-rw-r--r-- | arch/x86/Kconfig | 19 | ||||
-rw-r--r-- | arch/x86/include/asm/vmi.h | 269 | ||||
-rw-r--r-- | arch/x86/include/asm/vmi_time.h | 98 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vmi_32.c | 893 | ||||
-rw-r--r-- | arch/x86/kernel/vmiclock_32.c | 317 |
10 files changed, 5 insertions, 1636 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 842aa9de84a6..5e2bc4ab897a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -386,34 +386,6 @@ Who: Tejun Heo <tj@kernel.org> | |||
386 | 386 | ||
387 | ---------------------------- | 387 | ---------------------------- |
388 | 388 | ||
389 | What: Support for VMware's guest paravirtuliazation technique [VMI] will be | ||
390 | dropped. | ||
391 | When: 2.6.37 or earlier. | ||
392 | Why: With the recent innovations in CPU hardware acceleration technologies | ||
393 | from Intel and AMD, VMware ran a few experiments to compare these | ||
394 | techniques to guest paravirtualization technique on VMware's platform. | ||
395 | These hardware assisted virtualization techniques have outperformed the | ||
396 | performance benefits provided by VMI in most of the workloads. VMware | ||
397 | expects that these hardware features will be ubiquitous in a couple of | ||
398 | years, as a result, VMware has started a phased retirement of this | ||
399 | feature from the hypervisor. We will be removing this feature from the | ||
400 | Kernel too. Right now we are targeting 2.6.37 but can retire earlier if | ||
401 | technical reasons (read opportunity to remove major chunk of pvops) | ||
402 | arise. | ||
403 | |||
404 | Please note that VMI has always been an optimization and non-VMI kernels | ||
405 | still work fine on VMware's platform. | ||
406 | Latest versions of VMware's product which support VMI are, | ||
407 | Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence | ||
408 | releases for these products will continue supporting VMI. | ||
409 | |||
410 | For more details about VMI retirement take a look at this, | ||
411 | http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html | ||
412 | |||
413 | Who: Alok N Kataria <akataria@vmware.com> | ||
414 | |||
415 | ---------------------------- | ||
416 | |||
417 | What: Support for lcd_switch and display_get in asus-laptop driver | 389 | What: Support for lcd_switch and display_get in asus-laptop driver |
418 | When: March 2010 | 390 | When: March 2010 |
419 | Why: These two features use non-standard interfaces. There are the | 391 | Why: These two features use non-standard interfaces. There are the |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2c85c0692b01..582409801a41 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -455,7 +455,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
455 | [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, | 455 | [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, |
456 | pxa_timer,timer3,32k_counter,timer0_1 | 456 | pxa_timer,timer3,32k_counter,timer0_1 |
457 | [AVR32] avr32 | 457 | [AVR32] avr32 |
458 | [X86-32] pit,hpet,tsc,vmi-timer; | 458 | [X86-32] pit,hpet,tsc; |
459 | scx200_hrt on Geode; cyclone on IBM x440 | 459 | scx200_hrt on Geode; cyclone on IBM x440 |
460 | [MIPS] MIPS | 460 | [MIPS] MIPS |
461 | [PARISC] cr16 | 461 | [PARISC] cr16 |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cea0cd9a316f..f0ee331feeab 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -517,25 +517,6 @@ if PARAVIRT_GUEST | |||
517 | 517 | ||
518 | source "arch/x86/xen/Kconfig" | 518 | source "arch/x86/xen/Kconfig" |
519 | 519 | ||
520 | config VMI | ||
521 | bool "VMI Guest support (DEPRECATED)" | ||
522 | select PARAVIRT | ||
523 | depends on X86_32 | ||
524 | ---help--- | ||
525 | VMI provides a paravirtualized interface to the VMware ESX server | ||
526 | (it could be used by other hypervisors in theory too, but is not | ||
527 | at the moment), by linking the kernel to a GPL-ed ROM module | ||
528 | provided by the hypervisor. | ||
529 | |||
530 | As of September 2009, VMware has started a phased retirement | ||
531 | of this feature from VMware's products. Please see | ||
532 | feature-removal-schedule.txt for details. If you are | ||
533 | planning to enable this option, please note that you cannot | ||
534 | live migrate a VMI enabled VM to a future VMware product, | ||
535 | which doesn't support VMI. So if you expect your kernel to | ||
536 | seamlessly migrate to newer VMware products, keep this | ||
537 | disabled. | ||
538 | |||
539 | config KVM_CLOCK | 520 | config KVM_CLOCK |
540 | bool "KVM paravirtualized clock" | 521 | bool "KVM paravirtualized clock" |
541 | select PARAVIRT | 522 | select PARAVIRT |
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h deleted file mode 100644 index 61e08c0a2907..000000000000 --- a/arch/x86/include/asm/vmi.h +++ /dev/null | |||
@@ -1,269 +0,0 @@ | |||
1 | /* | ||
2 | * VMI interface definition | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Maintained by: Zachary Amsden zach@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | #include <linux/types.h> | ||
25 | |||
26 | /* | ||
27 | *--------------------------------------------------------------------- | ||
28 | * | ||
29 | * VMI Option ROM API | ||
30 | * | ||
31 | *--------------------------------------------------------------------- | ||
32 | */ | ||
33 | #define VMI_SIGNATURE 0x696d5663 /* "cVmi" */ | ||
34 | |||
35 | #define PCI_VENDOR_ID_VMWARE 0x15AD | ||
36 | #define PCI_DEVICE_ID_VMWARE_VMI 0x0801 | ||
37 | |||
38 | /* | ||
39 | * We use two version numbers for compatibility, with the major | ||
40 | * number signifying interface breakages, and the minor number | ||
41 | * interface extensions. | ||
42 | */ | ||
43 | #define VMI_API_REV_MAJOR 3 | ||
44 | #define VMI_API_REV_MINOR 0 | ||
45 | |||
46 | #define VMI_CALL_CPUID 0 | ||
47 | #define VMI_CALL_WRMSR 1 | ||
48 | #define VMI_CALL_RDMSR 2 | ||
49 | #define VMI_CALL_SetGDT 3 | ||
50 | #define VMI_CALL_SetLDT 4 | ||
51 | #define VMI_CALL_SetIDT 5 | ||
52 | #define VMI_CALL_SetTR 6 | ||
53 | #define VMI_CALL_GetGDT 7 | ||
54 | #define VMI_CALL_GetLDT 8 | ||
55 | #define VMI_CALL_GetIDT 9 | ||
56 | #define VMI_CALL_GetTR 10 | ||
57 | #define VMI_CALL_WriteGDTEntry 11 | ||
58 | #define VMI_CALL_WriteLDTEntry 12 | ||
59 | #define VMI_CALL_WriteIDTEntry 13 | ||
60 | #define VMI_CALL_UpdateKernelStack 14 | ||
61 | #define VMI_CALL_SetCR0 15 | ||
62 | #define VMI_CALL_SetCR2 16 | ||
63 | #define VMI_CALL_SetCR3 17 | ||
64 | #define VMI_CALL_SetCR4 18 | ||
65 | #define VMI_CALL_GetCR0 19 | ||
66 | #define VMI_CALL_GetCR2 20 | ||
67 | #define VMI_CALL_GetCR3 21 | ||
68 | #define VMI_CALL_GetCR4 22 | ||
69 | #define VMI_CALL_WBINVD 23 | ||
70 | #define VMI_CALL_SetDR 24 | ||
71 | #define VMI_CALL_GetDR 25 | ||
72 | #define VMI_CALL_RDPMC 26 | ||
73 | #define VMI_CALL_RDTSC 27 | ||
74 | #define VMI_CALL_CLTS 28 | ||
75 | #define VMI_CALL_EnableInterrupts 29 | ||
76 | #define VMI_CALL_DisableInterrupts 30 | ||
77 | #define VMI_CALL_GetInterruptMask 31 | ||
78 | #define VMI_CALL_SetInterruptMask 32 | ||
79 | #define VMI_CALL_IRET 33 | ||
80 | #define VMI_CALL_SYSEXIT 34 | ||
81 | #define VMI_CALL_Halt 35 | ||
82 | #define VMI_CALL_Reboot 36 | ||
83 | #define VMI_CALL_Shutdown 37 | ||
84 | #define VMI_CALL_SetPxE 38 | ||
85 | #define VMI_CALL_SetPxELong 39 | ||
86 | #define VMI_CALL_UpdatePxE 40 | ||
87 | #define VMI_CALL_UpdatePxELong 41 | ||
88 | #define VMI_CALL_MachineToPhysical 42 | ||
89 | #define VMI_CALL_PhysicalToMachine 43 | ||
90 | #define VMI_CALL_AllocatePage 44 | ||
91 | #define VMI_CALL_ReleasePage 45 | ||
92 | #define VMI_CALL_InvalPage 46 | ||
93 | #define VMI_CALL_FlushTLB 47 | ||
94 | #define VMI_CALL_SetLinearMapping 48 | ||
95 | |||
96 | #define VMI_CALL_SetIOPLMask 61 | ||
97 | #define VMI_CALL_SetInitialAPState 62 | ||
98 | #define VMI_CALL_APICWrite 63 | ||
99 | #define VMI_CALL_APICRead 64 | ||
100 | #define VMI_CALL_IODelay 65 | ||
101 | #define VMI_CALL_SetLazyMode 73 | ||
102 | |||
103 | /* | ||
104 | *--------------------------------------------------------------------- | ||
105 | * | ||
106 | * MMU operation flags | ||
107 | * | ||
108 | *--------------------------------------------------------------------- | ||
109 | */ | ||
110 | |||
111 | /* Flags used by VMI_{Allocate|Release}Page call */ | ||
112 | #define VMI_PAGE_PAE 0x10 /* Allocate PAE shadow */ | ||
113 | #define VMI_PAGE_CLONE 0x20 /* Clone from another shadow */ | ||
114 | #define VMI_PAGE_ZEROED 0x40 /* Page is pre-zeroed */ | ||
115 | |||
116 | |||
117 | /* Flags shared by Allocate|Release Page and PTE updates */ | ||
118 | #define VMI_PAGE_PT 0x01 | ||
119 | #define VMI_PAGE_PD 0x02 | ||
120 | #define VMI_PAGE_PDP 0x04 | ||
121 | #define VMI_PAGE_PML4 0x08 | ||
122 | |||
123 | #define VMI_PAGE_NORMAL 0x00 /* for debugging */ | ||
124 | |||
125 | /* Flags used by PTE updates */ | ||
126 | #define VMI_PAGE_CURRENT_AS 0x10 /* implies VMI_PAGE_VA_MASK is valid */ | ||
127 | #define VMI_PAGE_DEFER 0x20 /* may queue update until TLB inval */ | ||
128 | #define VMI_PAGE_VA_MASK 0xfffff000 | ||
129 | |||
130 | #ifdef CONFIG_X86_PAE | ||
131 | #define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED) | ||
132 | #define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED) | ||
133 | #else | ||
134 | #define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_ZEROED) | ||
135 | #define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_ZEROED) | ||
136 | #endif | ||
137 | |||
138 | /* Flags used by VMI_FlushTLB call */ | ||
139 | #define VMI_FLUSH_TLB 0x01 | ||
140 | #define VMI_FLUSH_GLOBAL 0x02 | ||
141 | |||
142 | /* | ||
143 | *--------------------------------------------------------------------- | ||
144 | * | ||
145 | * VMI relocation definitions for ROM call get_reloc | ||
146 | * | ||
147 | *--------------------------------------------------------------------- | ||
148 | */ | ||
149 | |||
150 | /* VMI Relocation types */ | ||
151 | #define VMI_RELOCATION_NONE 0 | ||
152 | #define VMI_RELOCATION_CALL_REL 1 | ||
153 | #define VMI_RELOCATION_JUMP_REL 2 | ||
154 | #define VMI_RELOCATION_NOP 3 | ||
155 | |||
156 | #ifndef __ASSEMBLY__ | ||
157 | struct vmi_relocation_info { | ||
158 | unsigned char *eip; | ||
159 | unsigned char type; | ||
160 | unsigned char reserved[3]; | ||
161 | }; | ||
162 | #endif | ||
163 | |||
164 | |||
165 | /* | ||
166 | *--------------------------------------------------------------------- | ||
167 | * | ||
168 | * Generic ROM structures and definitions | ||
169 | * | ||
170 | *--------------------------------------------------------------------- | ||
171 | */ | ||
172 | |||
173 | #ifndef __ASSEMBLY__ | ||
174 | |||
175 | struct vrom_header { | ||
176 | u16 rom_signature; /* option ROM signature */ | ||
177 | u8 rom_length; /* ROM length in 512 byte chunks */ | ||
178 | u8 rom_entry[4]; /* 16-bit code entry point */ | ||
179 | u8 rom_pad0; /* 4-byte align pad */ | ||
180 | u32 vrom_signature; /* VROM identification signature */ | ||
181 | u8 api_version_min;/* Minor version of API */ | ||
182 | u8 api_version_maj;/* Major version of API */ | ||
183 | u8 jump_slots; /* Number of jump slots */ | ||
184 | u8 reserved1; /* Reserved for expansion */ | ||
185 | u32 virtual_top; /* Hypervisor virtual address start */ | ||
186 | u16 reserved2; /* Reserved for expansion */ | ||
187 | u16 license_offs; /* Offset to License string */ | ||
188 | u16 pci_header_offs;/* Offset to PCI OPROM header */ | ||
189 | u16 pnp_header_offs;/* Offset to PnP OPROM header */ | ||
190 | u32 rom_pad3; /* PnP reserverd / VMI reserved */ | ||
191 | u8 reserved[96]; /* Reserved for headers */ | ||
192 | char vmi_init[8]; /* VMI_Init jump point */ | ||
193 | char get_reloc[8]; /* VMI_GetRelocationInfo jump point */ | ||
194 | } __attribute__((packed)); | ||
195 | |||
196 | struct pnp_header { | ||
197 | char sig[4]; | ||
198 | char rev; | ||
199 | char size; | ||
200 | short next; | ||
201 | short res; | ||
202 | long devID; | ||
203 | unsigned short manufacturer_offset; | ||
204 | unsigned short product_offset; | ||
205 | } __attribute__((packed)); | ||
206 | |||
207 | struct pci_header { | ||
208 | char sig[4]; | ||
209 | short vendorID; | ||
210 | short deviceID; | ||
211 | short vpdData; | ||
212 | short size; | ||
213 | char rev; | ||
214 | char class; | ||
215 | char subclass; | ||
216 | char interface; | ||
217 | short chunks; | ||
218 | char rom_version_min; | ||
219 | char rom_version_maj; | ||
220 | char codetype; | ||
221 | char lastRom; | ||
222 | short reserved; | ||
223 | } __attribute__((packed)); | ||
224 | |||
225 | /* Function prototypes for bootstrapping */ | ||
226 | #ifdef CONFIG_VMI | ||
227 | extern void vmi_init(void); | ||
228 | extern void vmi_activate(void); | ||
229 | extern void vmi_bringup(void); | ||
230 | #else | ||
231 | static inline void vmi_init(void) {} | ||
232 | static inline void vmi_activate(void) {} | ||
233 | static inline void vmi_bringup(void) {} | ||
234 | #endif | ||
235 | |||
236 | /* State needed to start an application processor in an SMP system. */ | ||
237 | struct vmi_ap_state { | ||
238 | u32 cr0; | ||
239 | u32 cr2; | ||
240 | u32 cr3; | ||
241 | u32 cr4; | ||
242 | |||
243 | u64 efer; | ||
244 | |||
245 | u32 eip; | ||
246 | u32 eflags; | ||
247 | u32 eax; | ||
248 | u32 ebx; | ||
249 | u32 ecx; | ||
250 | u32 edx; | ||
251 | u32 esp; | ||
252 | u32 ebp; | ||
253 | u32 esi; | ||
254 | u32 edi; | ||
255 | u16 cs; | ||
256 | u16 ss; | ||
257 | u16 ds; | ||
258 | u16 es; | ||
259 | u16 fs; | ||
260 | u16 gs; | ||
261 | u16 ldtr; | ||
262 | |||
263 | u16 gdtr_limit; | ||
264 | u32 gdtr_base; | ||
265 | u32 idtr_base; | ||
266 | u16 idtr_limit; | ||
267 | }; | ||
268 | |||
269 | #endif | ||
diff --git a/arch/x86/include/asm/vmi_time.h b/arch/x86/include/asm/vmi_time.h deleted file mode 100644 index c6e0bee93e3c..000000000000 --- a/arch/x86/include/asm/vmi_time.h +++ /dev/null | |||
@@ -1,98 +0,0 @@ | |||
1 | /* | ||
2 | * VMI Time wrappers | ||
3 | * | ||
4 | * Copyright (C) 2006, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #ifndef _ASM_X86_VMI_TIME_H | ||
26 | #define _ASM_X86_VMI_TIME_H | ||
27 | |||
28 | /* | ||
29 | * Raw VMI call indices for timer functions | ||
30 | */ | ||
31 | #define VMI_CALL_GetCycleFrequency 66 | ||
32 | #define VMI_CALL_GetCycleCounter 67 | ||
33 | #define VMI_CALL_SetAlarm 68 | ||
34 | #define VMI_CALL_CancelAlarm 69 | ||
35 | #define VMI_CALL_GetWallclockTime 70 | ||
36 | #define VMI_CALL_WallclockUpdated 71 | ||
37 | |||
38 | /* Cached VMI timer operations */ | ||
39 | extern struct vmi_timer_ops { | ||
40 | u64 (*get_cycle_frequency)(void); | ||
41 | u64 (*get_cycle_counter)(int); | ||
42 | u64 (*get_wallclock)(void); | ||
43 | int (*wallclock_updated)(void); | ||
44 | void (*set_alarm)(u32 flags, u64 expiry, u64 period); | ||
45 | void (*cancel_alarm)(u32 flags); | ||
46 | } vmi_timer_ops; | ||
47 | |||
48 | /* Prototypes */ | ||
49 | extern void __init vmi_time_init(void); | ||
50 | extern unsigned long vmi_get_wallclock(void); | ||
51 | extern int vmi_set_wallclock(unsigned long now); | ||
52 | extern unsigned long long vmi_sched_clock(void); | ||
53 | extern unsigned long vmi_tsc_khz(void); | ||
54 | |||
55 | #ifdef CONFIG_X86_LOCAL_APIC | ||
56 | extern void __devinit vmi_time_bsp_init(void); | ||
57 | extern void __devinit vmi_time_ap_init(void); | ||
58 | #endif | ||
59 | |||
60 | /* | ||
61 | * When run under a hypervisor, a vcpu is always in one of three states: | ||
62 | * running, halted, or ready. The vcpu is in the 'running' state if it | ||
63 | * is executing. When the vcpu executes the halt interface, the vcpu | ||
64 | * enters the 'halted' state and remains halted until there is some work | ||
65 | * pending for the vcpu (e.g. an alarm expires, host I/O completes on | ||
66 | * behalf of virtual I/O). At this point, the vcpu enters the 'ready' | ||
67 | * state (waiting for the hypervisor to reschedule it). Finally, at any | ||
68 | * time when the vcpu is not in the 'running' state nor the 'halted' | ||
69 | * state, it is in the 'ready' state. | ||
70 | * | ||
71 | * Real time is advances while the vcpu is 'running', 'ready', or | ||
72 | * 'halted'. Stolen time is the time in which the vcpu is in the | ||
73 | * 'ready' state. Available time is the remaining time -- the vcpu is | ||
74 | * either 'running' or 'halted'. | ||
75 | * | ||
76 | * All three views of time are accessible through the VMI cycle | ||
77 | * counters. | ||
78 | */ | ||
79 | |||
80 | /* The cycle counters. */ | ||
81 | #define VMI_CYCLES_REAL 0 | ||
82 | #define VMI_CYCLES_AVAILABLE 1 | ||
83 | #define VMI_CYCLES_STOLEN 2 | ||
84 | |||
85 | /* The alarm interface 'flags' bits */ | ||
86 | #define VMI_ALARM_COUNTERS 2 | ||
87 | |||
88 | #define VMI_ALARM_COUNTER_MASK 0x000000ff | ||
89 | |||
90 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
91 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
92 | |||
93 | #define VMI_ALARM_IS_ONESHOT 0x00000000 | ||
94 | #define VMI_ALARM_IS_PERIODIC 0x00000100 | ||
95 | |||
96 | #define CONFIG_VMI_ALARM_HZ 100 | ||
97 | |||
98 | #endif /* _ASM_X86_VMI_TIME_H */ | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0925676266bd..801127cd9754 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -91,7 +91,6 @@ obj-$(CONFIG_K8_NB) += k8.o | |||
91 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 91 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
92 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 92 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
93 | 93 | ||
94 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | ||
95 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 94 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
96 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 95 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
97 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 96 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c3a4fbb2b996..feb4c21e499a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -83,7 +83,6 @@ | |||
83 | #include <asm/dmi.h> | 83 | #include <asm/dmi.h> |
84 | #include <asm/io_apic.h> | 84 | #include <asm/io_apic.h> |
85 | #include <asm/ist.h> | 85 | #include <asm/ist.h> |
86 | #include <asm/vmi.h> | ||
87 | #include <asm/setup_arch.h> | 86 | #include <asm/setup_arch.h> |
88 | #include <asm/bios_ebda.h> | 87 | #include <asm/bios_ebda.h> |
89 | #include <asm/cacheflush.h> | 88 | #include <asm/cacheflush.h> |
@@ -734,10 +733,10 @@ void __init setup_arch(char **cmdline_p) | |||
734 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 733 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
735 | #endif | 734 | #endif |
736 | 735 | ||
737 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 736 | /* |
738 | vmi_init(); | 737 | * If we have OLPC OFW, we might end up relocating the fixmap due to |
739 | 738 | * reserve_top(), so do this before touching the ioremap area. | |
740 | /* OFW also may relocate the fixmap */ | 739 | */ |
741 | olpc_ofw_detect(); | 740 | olpc_ofw_detect(); |
742 | 741 | ||
743 | early_trap_init(); | 742 | early_trap_init(); |
@@ -838,9 +837,6 @@ void __init setup_arch(char **cmdline_p) | |||
838 | 837 | ||
839 | x86_report_nx(); | 838 | x86_report_nx(); |
840 | 839 | ||
841 | /* Must be before kernel pagetables are setup */ | ||
842 | vmi_activate(); | ||
843 | |||
844 | /* after early param, so could get panic from serial */ | 840 | /* after early param, so could get panic from serial */ |
845 | reserve_early_setup_data(); | 841 | reserve_early_setup_data(); |
846 | 842 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b3bfc4dd708..63a1a5596ac0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -62,7 +62,6 @@ | |||
62 | #include <asm/pgtable.h> | 62 | #include <asm/pgtable.h> |
63 | #include <asm/tlbflush.h> | 63 | #include <asm/tlbflush.h> |
64 | #include <asm/mtrr.h> | 64 | #include <asm/mtrr.h> |
65 | #include <asm/vmi.h> | ||
66 | #include <asm/apic.h> | 65 | #include <asm/apic.h> |
67 | #include <asm/setup.h> | 66 | #include <asm/setup.h> |
68 | #include <asm/uv/uv.h> | 67 | #include <asm/uv/uv.h> |
@@ -311,7 +310,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
311 | __flush_tlb_all(); | 310 | __flush_tlb_all(); |
312 | #endif | 311 | #endif |
313 | 312 | ||
314 | vmi_bringup(); | ||
315 | cpu_init(); | 313 | cpu_init(); |
316 | preempt_disable(); | 314 | preempt_disable(); |
317 | smp_callin(); | 315 | smp_callin(); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c deleted file mode 100644 index ce9fbacb7526..000000000000 --- a/arch/x86/kernel/vmi_32.c +++ /dev/null | |||
@@ -1,893 +0,0 @@ | |||
1 | /* | ||
2 | * VMI specific paravirt-ops implementation | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to zach@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/module.h> | ||
26 | #include <linux/cpu.h> | ||
27 | #include <linux/bootmem.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/highmem.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/gfp.h> | ||
32 | #include <asm/vmi.h> | ||
33 | #include <asm/io.h> | ||
34 | #include <asm/fixmap.h> | ||
35 | #include <asm/apicdef.h> | ||
36 | #include <asm/apic.h> | ||
37 | #include <asm/pgalloc.h> | ||
38 | #include <asm/processor.h> | ||
39 | #include <asm/timer.h> | ||
40 | #include <asm/vmi_time.h> | ||
41 | #include <asm/kmap_types.h> | ||
42 | #include <asm/setup.h> | ||
43 | |||
44 | /* Convenient for calling VMI functions indirectly in the ROM */ | ||
45 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | ||
46 | typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); | ||
47 | |||
48 | #define call_vrom_func(rom,func) \ | ||
49 | (((VROMFUNC *)(rom->func))()) | ||
50 | |||
51 | #define call_vrom_long_func(rom,func,arg) \ | ||
52 | (((VROMLONGFUNC *)(rom->func)) (arg)) | ||
53 | |||
54 | static struct vrom_header *vmi_rom; | ||
55 | static int disable_pge; | ||
56 | static int disable_pse; | ||
57 | static int disable_sep; | ||
58 | static int disable_tsc; | ||
59 | static int disable_mtrr; | ||
60 | static int disable_noidle; | ||
61 | static int disable_vmi_timer; | ||
62 | |||
63 | /* Cached VMI operations */ | ||
64 | static struct { | ||
65 | void (*cpuid)(void /* non-c */); | ||
66 | void (*_set_ldt)(u32 selector); | ||
67 | void (*set_tr)(u32 selector); | ||
68 | void (*write_idt_entry)(struct desc_struct *, int, u32, u32); | ||
69 | void (*write_gdt_entry)(struct desc_struct *, int, u32, u32); | ||
70 | void (*write_ldt_entry)(struct desc_struct *, int, u32, u32); | ||
71 | void (*set_kernel_stack)(u32 selector, u32 sp0); | ||
72 | void (*allocate_page)(u32, u32, u32, u32, u32); | ||
73 | void (*release_page)(u32, u32); | ||
74 | void (*set_pte)(pte_t, pte_t *, unsigned); | ||
75 | void (*update_pte)(pte_t *, unsigned); | ||
76 | void (*set_linear_mapping)(int, void *, u32, u32); | ||
77 | void (*_flush_tlb)(int); | ||
78 | void (*set_initial_ap_state)(int, int); | ||
79 | void (*halt)(void); | ||
80 | void (*set_lazy_mode)(int mode); | ||
81 | } vmi_ops; | ||
82 | |||
83 | /* Cached VMI operations */ | ||
84 | struct vmi_timer_ops vmi_timer_ops; | ||
85 | |||
86 | /* | ||
87 | * VMI patching routines. | ||
88 | */ | ||
89 | #define MNEM_CALL 0xe8 | ||
90 | #define MNEM_JMP 0xe9 | ||
91 | #define MNEM_RET 0xc3 | ||
92 | |||
93 | #define IRQ_PATCH_INT_MASK 0 | ||
94 | #define IRQ_PATCH_DISABLE 5 | ||
95 | |||
96 | static inline void patch_offset(void *insnbuf, | ||
97 | unsigned long ip, unsigned long dest) | ||
98 | { | ||
99 | *(unsigned long *)(insnbuf+1) = dest-ip-5; | ||
100 | } | ||
101 | |||
102 | static unsigned patch_internal(int call, unsigned len, void *insnbuf, | ||
103 | unsigned long ip) | ||
104 | { | ||
105 | u64 reloc; | ||
106 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | ||
107 | reloc = call_vrom_long_func(vmi_rom, get_reloc, call); | ||
108 | switch(rel->type) { | ||
109 | case VMI_RELOCATION_CALL_REL: | ||
110 | BUG_ON(len < 5); | ||
111 | *(char *)insnbuf = MNEM_CALL; | ||
112 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); | ||
113 | return 5; | ||
114 | |||
115 | case VMI_RELOCATION_JUMP_REL: | ||
116 | BUG_ON(len < 5); | ||
117 | *(char *)insnbuf = MNEM_JMP; | ||
118 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); | ||
119 | return 5; | ||
120 | |||
121 | case VMI_RELOCATION_NOP: | ||
122 | /* obliterate the whole thing */ | ||
123 | return 0; | ||
124 | |||
125 | case VMI_RELOCATION_NONE: | ||
126 | /* leave native code in place */ | ||
127 | break; | ||
128 | |||
129 | default: | ||
130 | BUG(); | ||
131 | } | ||
132 | return len; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Apply patch if appropriate, return length of new instruction | ||
137 | * sequence. The callee does nop padding for us. | ||
138 | */ | ||
139 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | ||
140 | unsigned long ip, unsigned len) | ||
141 | { | ||
142 | switch (type) { | ||
143 | case PARAVIRT_PATCH(pv_irq_ops.irq_disable): | ||
144 | return patch_internal(VMI_CALL_DisableInterrupts, len, | ||
145 | insns, ip); | ||
146 | case PARAVIRT_PATCH(pv_irq_ops.irq_enable): | ||
147 | return patch_internal(VMI_CALL_EnableInterrupts, len, | ||
148 | insns, ip); | ||
149 | case PARAVIRT_PATCH(pv_irq_ops.restore_fl): | ||
150 | return patch_internal(VMI_CALL_SetInterruptMask, len, | ||
151 | insns, ip); | ||
152 | case PARAVIRT_PATCH(pv_irq_ops.save_fl): | ||
153 | return patch_internal(VMI_CALL_GetInterruptMask, len, | ||
154 | insns, ip); | ||
155 | case PARAVIRT_PATCH(pv_cpu_ops.iret): | ||
156 | return patch_internal(VMI_CALL_IRET, len, insns, ip); | ||
157 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): | ||
158 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); | ||
159 | default: | ||
160 | break; | ||
161 | } | ||
162 | return len; | ||
163 | } | ||
164 | |||
165 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | ||
166 | static void vmi_cpuid(unsigned int *ax, unsigned int *bx, | ||
167 | unsigned int *cx, unsigned int *dx) | ||
168 | { | ||
169 | int override = 0; | ||
170 | if (*ax == 1) | ||
171 | override = 1; | ||
172 | asm volatile ("call *%6" | ||
173 | : "=a" (*ax), | ||
174 | "=b" (*bx), | ||
175 | "=c" (*cx), | ||
176 | "=d" (*dx) | ||
177 | : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid)); | ||
178 | if (override) { | ||
179 | if (disable_pse) | ||
180 | *dx &= ~X86_FEATURE_PSE; | ||
181 | if (disable_pge) | ||
182 | *dx &= ~X86_FEATURE_PGE; | ||
183 | if (disable_sep) | ||
184 | *dx &= ~X86_FEATURE_SEP; | ||
185 | if (disable_tsc) | ||
186 | *dx &= ~X86_FEATURE_TSC; | ||
187 | if (disable_mtrr) | ||
188 | *dx &= ~X86_FEATURE_MTRR; | ||
189 | } | ||
190 | } | ||
191 | |||
192 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | ||
193 | { | ||
194 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | ||
195 | write_gdt_entry(gdt, nr, new, 0); | ||
196 | } | ||
197 | |||
198 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | ||
199 | { | ||
200 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
201 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]); | ||
202 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]); | ||
203 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]); | ||
204 | } | ||
205 | |||
206 | static void vmi_set_ldt(const void *addr, unsigned entries) | ||
207 | { | ||
208 | unsigned cpu = smp_processor_id(); | ||
209 | struct desc_struct desc; | ||
210 | |||
211 | pack_descriptor(&desc, (unsigned long)addr, | ||
212 | entries * sizeof(struct desc_struct) - 1, | ||
213 | DESC_LDT, 0); | ||
214 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT); | ||
215 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | ||
216 | } | ||
217 | |||
218 | static void vmi_set_tr(void) | ||
219 | { | ||
220 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | ||
221 | } | ||
222 | |||
223 | static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) | ||
224 | { | ||
225 | u32 *idt_entry = (u32 *)g; | ||
226 | vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]); | ||
227 | } | ||
228 | |||
229 | static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, | ||
230 | const void *desc, int type) | ||
231 | { | ||
232 | u32 *gdt_entry = (u32 *)desc; | ||
233 | vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]); | ||
234 | } | ||
235 | |||
236 | static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | ||
237 | const void *desc) | ||
238 | { | ||
239 | u32 *ldt_entry = (u32 *)desc; | ||
240 | vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | ||
241 | } | ||
242 | |||
243 | static void vmi_load_sp0(struct tss_struct *tss, | ||
244 | struct thread_struct *thread) | ||
245 | { | ||
246 | tss->x86_tss.sp0 = thread->sp0; | ||
247 | |||
248 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
249 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | ||
250 | tss->x86_tss.ss1 = thread->sysenter_cs; | ||
251 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
252 | } | ||
253 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0); | ||
254 | } | ||
255 | |||
256 | static void vmi_flush_tlb_user(void) | ||
257 | { | ||
258 | vmi_ops._flush_tlb(VMI_FLUSH_TLB); | ||
259 | } | ||
260 | |||
261 | static void vmi_flush_tlb_kernel(void) | ||
262 | { | ||
263 | vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | ||
264 | } | ||
265 | |||
266 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | ||
267 | static void vmi_nop(void) | ||
268 | { | ||
269 | } | ||
270 | |||
271 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | ||
272 | { | ||
273 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | ||
274 | } | ||
275 | |||
276 | static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) | ||
277 | { | ||
278 | /* | ||
279 | * This call comes in very early, before mem_map is setup. | ||
280 | * It is called only for swapper_pg_dir, which already has | ||
281 | * data on it. | ||
282 | */ | ||
283 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | ||
284 | } | ||
285 | |||
286 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) | ||
287 | { | ||
288 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | ||
289 | } | ||
290 | |||
291 | static void vmi_release_pte(unsigned long pfn) | ||
292 | { | ||
293 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | ||
294 | } | ||
295 | |||
296 | static void vmi_release_pmd(unsigned long pfn) | ||
297 | { | ||
298 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * We use the pgd_free hook for releasing the pgd page: | ||
303 | */ | ||
304 | static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
305 | { | ||
306 | unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; | ||
307 | |||
308 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Helper macros for MMU update flags. We can defer updates until a flush | ||
313 | * or page invalidation only if the update is to the current address space | ||
314 | * (otherwise, there is no flush). We must check against init_mm, since | ||
315 | * this could be a kernel update, which usually passes init_mm, although | ||
316 | * sometimes this check can be skipped if we know the particular function | ||
317 | * is only called on user mode PTEs. We could change the kernel to pass | ||
318 | * current->active_mm here, but in particular, I was unsure if changing | ||
319 | * mm/highmem.c to do this would still be correct on other architectures. | ||
320 | */ | ||
321 | #define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \ | ||
322 | (!mustbeuser && (mm) == &init_mm)) | ||
323 | #define vmi_flags_addr(mm, addr, level, user) \ | ||
324 | ((level) | (is_current_as(mm, user) ? \ | ||
325 | (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
326 | #define vmi_flags_addr_defer(mm, addr, level, user) \ | ||
327 | ((level) | (is_current_as(mm, user) ? \ | ||
328 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
329 | |||
330 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
331 | { | ||
332 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
333 | } | ||
334 | |||
335 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
336 | { | ||
337 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | ||
338 | } | ||
339 | |||
340 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | ||
341 | { | ||
342 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | ||
343 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | ||
344 | } | ||
345 | |||
346 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
347 | { | ||
348 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
349 | } | ||
350 | |||
351 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
352 | { | ||
353 | #ifdef CONFIG_X86_PAE | ||
354 | const pte_t pte = { .pte = pmdval.pmd }; | ||
355 | #else | ||
356 | const pte_t pte = { pmdval.pud.pgd.pgd }; | ||
357 | #endif | ||
358 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | ||
359 | } | ||
360 | |||
361 | #ifdef CONFIG_X86_PAE | ||
362 | |||
363 | static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
364 | { | ||
365 | /* | ||
366 | * XXX This is called from set_pmd_pte, but at both PT | ||
367 | * and PD layers so the VMI_PAGE_PT flag is wrong. But | ||
368 | * it is only called for large page mapping changes, | ||
369 | * the Xen backend, doesn't support large pages, and the | ||
370 | * ESX backend doesn't depend on the flag. | ||
371 | */ | ||
372 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
373 | vmi_ops.update_pte(ptep, VMI_PAGE_PT); | ||
374 | } | ||
375 | |||
376 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | ||
377 | { | ||
378 | /* Um, eww */ | ||
379 | const pte_t pte = { .pte = pudval.pgd.pgd }; | ||
380 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | ||
381 | } | ||
382 | |||
383 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
384 | { | ||
385 | const pte_t pte = { .pte = 0 }; | ||
386 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
387 | } | ||
388 | |||
389 | static void vmi_pmd_clear(pmd_t *pmd) | ||
390 | { | ||
391 | const pte_t pte = { .pte = 0 }; | ||
392 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | ||
393 | } | ||
394 | #endif | ||
395 | |||
396 | #ifdef CONFIG_SMP | ||
397 | static void __devinit | ||
398 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
399 | unsigned long start_esp) | ||
400 | { | ||
401 | struct vmi_ap_state ap; | ||
402 | |||
403 | /* Default everything to zero. This is fine for most GPRs. */ | ||
404 | memset(&ap, 0, sizeof(struct vmi_ap_state)); | ||
405 | |||
406 | ap.gdtr_limit = GDT_SIZE - 1; | ||
407 | ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid); | ||
408 | |||
409 | ap.idtr_limit = IDT_ENTRIES * 8 - 1; | ||
410 | ap.idtr_base = (unsigned long) idt_table; | ||
411 | |||
412 | ap.ldtr = 0; | ||
413 | |||
414 | ap.cs = __KERNEL_CS; | ||
415 | ap.eip = (unsigned long) start_eip; | ||
416 | ap.ss = __KERNEL_DS; | ||
417 | ap.esp = (unsigned long) start_esp; | ||
418 | |||
419 | ap.ds = __USER_DS; | ||
420 | ap.es = __USER_DS; | ||
421 | ap.fs = __KERNEL_PERCPU; | ||
422 | ap.gs = __KERNEL_STACK_CANARY; | ||
423 | |||
424 | ap.eflags = 0; | ||
425 | |||
426 | #ifdef CONFIG_X86_PAE | ||
427 | /* efer should match BSP efer. */ | ||
428 | if (cpu_has_nx) { | ||
429 | unsigned l, h; | ||
430 | rdmsr(MSR_EFER, l, h); | ||
431 | ap.efer = (unsigned long long) h << 32 | l; | ||
432 | } | ||
433 | #endif | ||
434 | |||
435 | ap.cr3 = __pa(swapper_pg_dir); | ||
436 | /* Protected mode, paging, AM, WP, NE, MP. */ | ||
437 | ap.cr0 = 0x80050023; | ||
438 | ap.cr4 = mmu_cr4_features; | ||
439 | vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid); | ||
440 | } | ||
441 | #endif | ||
442 | |||
443 | static void vmi_start_context_switch(struct task_struct *prev) | ||
444 | { | ||
445 | paravirt_start_context_switch(prev); | ||
446 | vmi_ops.set_lazy_mode(2); | ||
447 | } | ||
448 | |||
449 | static void vmi_end_context_switch(struct task_struct *next) | ||
450 | { | ||
451 | vmi_ops.set_lazy_mode(0); | ||
452 | paravirt_end_context_switch(next); | ||
453 | } | ||
454 | |||
455 | static void vmi_enter_lazy_mmu(void) | ||
456 | { | ||
457 | paravirt_enter_lazy_mmu(); | ||
458 | vmi_ops.set_lazy_mode(1); | ||
459 | } | ||
460 | |||
461 | static void vmi_leave_lazy_mmu(void) | ||
462 | { | ||
463 | vmi_ops.set_lazy_mode(0); | ||
464 | paravirt_leave_lazy_mmu(); | ||
465 | } | ||
466 | |||
467 | static inline int __init check_vmi_rom(struct vrom_header *rom) | ||
468 | { | ||
469 | struct pci_header *pci; | ||
470 | struct pnp_header *pnp; | ||
471 | const char *manufacturer = "UNKNOWN"; | ||
472 | const char *product = "UNKNOWN"; | ||
473 | const char *license = "unspecified"; | ||
474 | |||
475 | if (rom->rom_signature != 0xaa55) | ||
476 | return 0; | ||
477 | if (rom->vrom_signature != VMI_SIGNATURE) | ||
478 | return 0; | ||
479 | if (rom->api_version_maj != VMI_API_REV_MAJOR || | ||
480 | rom->api_version_min+1 < VMI_API_REV_MINOR+1) { | ||
481 | printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", | ||
482 | rom->api_version_maj, | ||
483 | rom->api_version_min); | ||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | /* | ||
488 | * Relying on the VMI_SIGNATURE field is not 100% safe, so check | ||
489 | * the PCI header and device type to make sure this is really a | ||
490 | * VMI device. | ||
491 | */ | ||
492 | if (!rom->pci_header_offs) { | ||
493 | printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n"); | ||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | pci = (struct pci_header *)((char *)rom+rom->pci_header_offs); | ||
498 | if (pci->vendorID != PCI_VENDOR_ID_VMWARE || | ||
499 | pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) { | ||
500 | /* Allow it to run... anyways, but warn */ | ||
501 | printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n"); | ||
502 | } | ||
503 | |||
504 | if (rom->pnp_header_offs) { | ||
505 | pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs); | ||
506 | if (pnp->manufacturer_offset) | ||
507 | manufacturer = (const char *)rom+pnp->manufacturer_offset; | ||
508 | if (pnp->product_offset) | ||
509 | product = (const char *)rom+pnp->product_offset; | ||
510 | } | ||
511 | |||
512 | if (rom->license_offs) | ||
513 | license = (char *)rom+rom->license_offs; | ||
514 | |||
515 | printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n", | ||
516 | manufacturer, product, | ||
517 | rom->api_version_maj, rom->api_version_min, | ||
518 | pci->rom_version_maj, pci->rom_version_min); | ||
519 | |||
520 | /* Don't allow BSD/MIT here for now because we don't want to end up | ||
521 | with any binary only shim layers */ | ||
522 | if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) { | ||
523 | printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n", | ||
524 | license); | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | return 1; | ||
529 | } | ||
530 | |||
531 | /* | ||
532 | * Probe for the VMI option ROM | ||
533 | */ | ||
534 | static inline int __init probe_vmi_rom(void) | ||
535 | { | ||
536 | unsigned long base; | ||
537 | |||
538 | /* VMI ROM is in option ROM area, check signature */ | ||
539 | for (base = 0xC0000; base < 0xE0000; base += 2048) { | ||
540 | struct vrom_header *romstart; | ||
541 | romstart = (struct vrom_header *)isa_bus_to_virt(base); | ||
542 | if (check_vmi_rom(romstart)) { | ||
543 | vmi_rom = romstart; | ||
544 | return 1; | ||
545 | } | ||
546 | } | ||
547 | return 0; | ||
548 | } | ||
549 | |||
550 | /* | ||
551 | * VMI setup common to all processors | ||
552 | */ | ||
553 | void vmi_bringup(void) | ||
554 | { | ||
555 | /* We must establish the lowmem mapping for MMU ops to work */ | ||
556 | if (vmi_ops.set_linear_mapping) | ||
557 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Return a pointer to a VMI function or NULL if unimplemented | ||
562 | */ | ||
563 | static void *vmi_get_function(int vmicall) | ||
564 | { | ||
565 | u64 reloc; | ||
566 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
567 | reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall); | ||
568 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); | ||
569 | if (rel->type == VMI_RELOCATION_CALL_REL) | ||
570 | return (void *)rel->eip; | ||
571 | else | ||
572 | return NULL; | ||
573 | } | ||
574 | |||
575 | /* | ||
576 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
577 | * For unimplemented operations, fall back to default, unless nop | ||
578 | * is returned by the ROM. | ||
579 | */ | ||
580 | #define para_fill(opname, vmicall) \ | ||
581 | do { \ | ||
582 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
583 | VMI_CALL_##vmicall); \ | ||
584 | if (rel->type == VMI_RELOCATION_CALL_REL) \ | ||
585 | opname = (void *)rel->eip; \ | ||
586 | else if (rel->type == VMI_RELOCATION_NOP) \ | ||
587 | opname = (void *)vmi_nop; \ | ||
588 | else if (rel->type != VMI_RELOCATION_NONE) \ | ||
589 | printk(KERN_WARNING "VMI: Unknown relocation " \ | ||
590 | "type %d for " #vmicall"\n",\ | ||
591 | rel->type); \ | ||
592 | } while (0) | ||
593 | |||
594 | /* | ||
595 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
596 | * For cached operations which do not match the VMI ROM ABI and must | ||
597 | * go through a tranlation stub. Ignore NOPs, since it is not clear | ||
598 | * a NOP * VMI function corresponds to a NOP paravirt-op when the | ||
599 | * functions are not in 1-1 correspondence. | ||
600 | */ | ||
601 | #define para_wrap(opname, wrapper, cache, vmicall) \ | ||
602 | do { \ | ||
603 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
604 | VMI_CALL_##vmicall); \ | ||
605 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ | ||
606 | if (rel->type == VMI_RELOCATION_CALL_REL) { \ | ||
607 | opname = wrapper; \ | ||
608 | vmi_ops.cache = (void *)rel->eip; \ | ||
609 | } \ | ||
610 | } while (0) | ||
611 | |||
612 | /* | ||
613 | * Activate the VMI interface and switch into paravirtualized mode | ||
614 | */ | ||
615 | static inline int __init activate_vmi(void) | ||
616 | { | ||
617 | short kernel_cs; | ||
618 | u64 reloc; | ||
619 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
620 | |||
621 | /* | ||
622 | * Prevent page tables from being allocated in highmem, even if | ||
623 | * CONFIG_HIGHPTE is enabled. | ||
624 | */ | ||
625 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
626 | |||
627 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | ||
628 | printk(KERN_ERR "VMI ROM failed to initialize!"); | ||
629 | return 0; | ||
630 | } | ||
631 | savesegment(cs, kernel_cs); | ||
632 | |||
633 | pv_info.paravirt_enabled = 1; | ||
634 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | ||
635 | pv_info.name = "vmi [deprecated]"; | ||
636 | |||
637 | pv_init_ops.patch = vmi_patch; | ||
638 | |||
639 | /* | ||
640 | * Many of these operations are ABI compatible with VMI. | ||
641 | * This means we can fill in the paravirt-ops with direct | ||
642 | * pointers into the VMI ROM. If the calling convention for | ||
643 | * these operations changes, this code needs to be updated. | ||
644 | * | ||
645 | * Exceptions | ||
646 | * CPUID paravirt-op uses pointers, not the native ISA | ||
647 | * halt has no VMI equivalent; all VMI halts are "safe" | ||
648 | * no MSR support yet - just trap and emulate. VMI uses the | ||
649 | * same ABI as the native ISA, but Linux wants exceptions | ||
650 | * from bogus MSR read / write handled | ||
651 | * rdpmc is not yet used in Linux | ||
652 | */ | ||
653 | |||
654 | /* CPUID is special, so very special it gets wrapped like a present */ | ||
655 | para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID); | ||
656 | |||
657 | para_fill(pv_cpu_ops.clts, CLTS); | ||
658 | para_fill(pv_cpu_ops.get_debugreg, GetDR); | ||
659 | para_fill(pv_cpu_ops.set_debugreg, SetDR); | ||
660 | para_fill(pv_cpu_ops.read_cr0, GetCR0); | ||
661 | para_fill(pv_mmu_ops.read_cr2, GetCR2); | ||
662 | para_fill(pv_mmu_ops.read_cr3, GetCR3); | ||
663 | para_fill(pv_cpu_ops.read_cr4, GetCR4); | ||
664 | para_fill(pv_cpu_ops.write_cr0, SetCR0); | ||
665 | para_fill(pv_mmu_ops.write_cr2, SetCR2); | ||
666 | para_fill(pv_mmu_ops.write_cr3, SetCR3); | ||
667 | para_fill(pv_cpu_ops.write_cr4, SetCR4); | ||
668 | |||
669 | para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); | ||
670 | para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); | ||
671 | para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); | ||
672 | para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); | ||
673 | |||
674 | para_fill(pv_cpu_ops.wbinvd, WBINVD); | ||
675 | para_fill(pv_cpu_ops.read_tsc, RDTSC); | ||
676 | |||
677 | /* The following we emulate with trap and emulate for now */ | ||
678 | /* paravirt_ops.read_msr = vmi_rdmsr */ | ||
679 | /* paravirt_ops.write_msr = vmi_wrmsr */ | ||
680 | /* paravirt_ops.rdpmc = vmi_rdpmc */ | ||
681 | |||
682 | /* TR interface doesn't pass TR value, wrap */ | ||
683 | para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR); | ||
684 | |||
685 | /* LDT is special, too */ | ||
686 | para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT); | ||
687 | |||
688 | para_fill(pv_cpu_ops.load_gdt, SetGDT); | ||
689 | para_fill(pv_cpu_ops.load_idt, SetIDT); | ||
690 | para_fill(pv_cpu_ops.store_gdt, GetGDT); | ||
691 | para_fill(pv_cpu_ops.store_idt, GetIDT); | ||
692 | para_fill(pv_cpu_ops.store_tr, GetTR); | ||
693 | pv_cpu_ops.load_tls = vmi_load_tls; | ||
694 | para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry, | ||
695 | write_ldt_entry, WriteLDTEntry); | ||
696 | para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry, | ||
697 | write_gdt_entry, WriteGDTEntry); | ||
698 | para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry, | ||
699 | write_idt_entry, WriteIDTEntry); | ||
700 | para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack); | ||
701 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | ||
702 | para_fill(pv_cpu_ops.io_delay, IODelay); | ||
703 | |||
704 | para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, | ||
705 | set_lazy_mode, SetLazyMode); | ||
706 | para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, | ||
707 | set_lazy_mode, SetLazyMode); | ||
708 | |||
709 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, | ||
710 | set_lazy_mode, SetLazyMode); | ||
711 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, | ||
712 | set_lazy_mode, SetLazyMode); | ||
713 | |||
714 | /* user and kernel flush are just handled with different flags to FlushTLB */ | ||
715 | para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); | ||
716 | para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); | ||
717 | para_fill(pv_mmu_ops.flush_tlb_single, InvalPage); | ||
718 | |||
719 | /* | ||
720 | * Until a standard flag format can be agreed on, we need to | ||
721 | * implement these as wrappers in Linux. Get the VMI ROM | ||
722 | * function pointers for the two backend calls. | ||
723 | */ | ||
724 | #ifdef CONFIG_X86_PAE | ||
725 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong); | ||
726 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong); | ||
727 | #else | ||
728 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); | ||
729 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); | ||
730 | #endif | ||
731 | |||
732 | if (vmi_ops.set_pte) { | ||
733 | pv_mmu_ops.set_pte = vmi_set_pte; | ||
734 | pv_mmu_ops.set_pte_at = vmi_set_pte_at; | ||
735 | pv_mmu_ops.set_pmd = vmi_set_pmd; | ||
736 | #ifdef CONFIG_X86_PAE | ||
737 | pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; | ||
738 | pv_mmu_ops.set_pud = vmi_set_pud; | ||
739 | pv_mmu_ops.pte_clear = vmi_pte_clear; | ||
740 | pv_mmu_ops.pmd_clear = vmi_pmd_clear; | ||
741 | #endif | ||
742 | } | ||
743 | |||
744 | if (vmi_ops.update_pte) { | ||
745 | pv_mmu_ops.pte_update = vmi_update_pte; | ||
746 | pv_mmu_ops.pte_update_defer = vmi_update_pte_defer; | ||
747 | } | ||
748 | |||
749 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
750 | if (vmi_ops.allocate_page) { | ||
751 | pv_mmu_ops.alloc_pte = vmi_allocate_pte; | ||
752 | pv_mmu_ops.alloc_pmd = vmi_allocate_pmd; | ||
753 | pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone; | ||
754 | } | ||
755 | |||
756 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
757 | if (vmi_ops.release_page) { | ||
758 | pv_mmu_ops.release_pte = vmi_release_pte; | ||
759 | pv_mmu_ops.release_pmd = vmi_release_pmd; | ||
760 | pv_mmu_ops.pgd_free = vmi_pgd_free; | ||
761 | } | ||
762 | |||
763 | /* Set linear is needed in all cases */ | ||
764 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
765 | |||
766 | /* | ||
767 | * These MUST always be patched. Don't support indirect jumps | ||
768 | * through these operations, as the VMI interface may use either | ||
769 | * a jump or a call to get to these operations, depending on | ||
770 | * the backend. They are performance critical anyway, so requiring | ||
771 | * a patch is not a big problem. | ||
772 | */ | ||
773 | pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; | ||
774 | pv_cpu_ops.iret = (void *)0xbadbab0; | ||
775 | |||
776 | #ifdef CONFIG_SMP | ||
777 | para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); | ||
778 | #endif | ||
779 | |||
780 | #ifdef CONFIG_X86_LOCAL_APIC | ||
781 | para_fill(apic->read, APICRead); | ||
782 | para_fill(apic->write, APICWrite); | ||
783 | #endif | ||
784 | |||
785 | /* | ||
786 | * Check for VMI timer functionality by probing for a cycle frequency method | ||
787 | */ | ||
788 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | ||
789 | if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { | ||
790 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | ||
791 | vmi_timer_ops.get_cycle_counter = | ||
792 | vmi_get_function(VMI_CALL_GetCycleCounter); | ||
793 | vmi_timer_ops.get_wallclock = | ||
794 | vmi_get_function(VMI_CALL_GetWallclockTime); | ||
795 | vmi_timer_ops.wallclock_updated = | ||
796 | vmi_get_function(VMI_CALL_WallclockUpdated); | ||
797 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | ||
798 | vmi_timer_ops.cancel_alarm = | ||
799 | vmi_get_function(VMI_CALL_CancelAlarm); | ||
800 | x86_init.timers.timer_init = vmi_time_init; | ||
801 | #ifdef CONFIG_X86_LOCAL_APIC | ||
802 | x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init; | ||
803 | x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; | ||
804 | #endif | ||
805 | pv_time_ops.sched_clock = vmi_sched_clock; | ||
806 | x86_platform.calibrate_tsc = vmi_tsc_khz; | ||
807 | x86_platform.get_wallclock = vmi_get_wallclock; | ||
808 | x86_platform.set_wallclock = vmi_set_wallclock; | ||
809 | |||
810 | /* We have true wallclock functions; disable CMOS clock sync */ | ||
811 | no_sync_cmos_clock = 1; | ||
812 | } else { | ||
813 | disable_noidle = 1; | ||
814 | disable_vmi_timer = 1; | ||
815 | } | ||
816 | |||
817 | para_fill(pv_irq_ops.safe_halt, Halt); | ||
818 | |||
819 | /* | ||
820 | * Alternative instruction rewriting doesn't happen soon enough | ||
821 | * to convert VMI_IRET to a call instead of a jump; so we have | ||
822 | * to do this before IRQs get reenabled. Fortunately, it is | ||
823 | * idempotent. | ||
824 | */ | ||
825 | apply_paravirt(__parainstructions, __parainstructions_end); | ||
826 | |||
827 | vmi_bringup(); | ||
828 | |||
829 | return 1; | ||
830 | } | ||
831 | |||
832 | #undef para_fill | ||
833 | |||
834 | void __init vmi_init(void) | ||
835 | { | ||
836 | if (!vmi_rom) | ||
837 | probe_vmi_rom(); | ||
838 | else | ||
839 | check_vmi_rom(vmi_rom); | ||
840 | |||
841 | /* In case probing for or validating the ROM failed, basil */ | ||
842 | if (!vmi_rom) | ||
843 | return; | ||
844 | |||
845 | reserve_top_address(-vmi_rom->virtual_top); | ||
846 | |||
847 | #ifdef CONFIG_X86_IO_APIC | ||
848 | /* This is virtual hardware; timer routing is wired correctly */ | ||
849 | no_timer_check = 1; | ||
850 | #endif | ||
851 | } | ||
852 | |||
853 | void __init vmi_activate(void) | ||
854 | { | ||
855 | unsigned long flags; | ||
856 | |||
857 | if (!vmi_rom) | ||
858 | return; | ||
859 | |||
860 | local_irq_save(flags); | ||
861 | activate_vmi(); | ||
862 | local_irq_restore(flags & X86_EFLAGS_IF); | ||
863 | } | ||
864 | |||
865 | static int __init parse_vmi(char *arg) | ||
866 | { | ||
867 | if (!arg) | ||
868 | return -EINVAL; | ||
869 | |||
870 | if (!strcmp(arg, "disable_pge")) { | ||
871 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); | ||
872 | disable_pge = 1; | ||
873 | } else if (!strcmp(arg, "disable_pse")) { | ||
874 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE); | ||
875 | disable_pse = 1; | ||
876 | } else if (!strcmp(arg, "disable_sep")) { | ||
877 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); | ||
878 | disable_sep = 1; | ||
879 | } else if (!strcmp(arg, "disable_tsc")) { | ||
880 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC); | ||
881 | disable_tsc = 1; | ||
882 | } else if (!strcmp(arg, "disable_mtrr")) { | ||
883 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR); | ||
884 | disable_mtrr = 1; | ||
885 | } else if (!strcmp(arg, "disable_timer")) { | ||
886 | disable_vmi_timer = 1; | ||
887 | disable_noidle = 1; | ||
888 | } else if (!strcmp(arg, "disable_noidle")) | ||
889 | disable_noidle = 1; | ||
890 | return 0; | ||
891 | } | ||
892 | |||
893 | early_param("vmi", parse_vmi); | ||
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c deleted file mode 100644 index 5e1ff66ecd73..000000000000 --- a/arch/x86/kernel/vmiclock_32.c +++ /dev/null | |||
@@ -1,317 +0,0 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2007, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/smp.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/cpumask.h> | ||
26 | #include <linux/clocksource.h> | ||
27 | #include <linux/clockchips.h> | ||
28 | |||
29 | #include <asm/vmi.h> | ||
30 | #include <asm/vmi_time.h> | ||
31 | #include <asm/apicdef.h> | ||
32 | #include <asm/apic.h> | ||
33 | #include <asm/timer.h> | ||
34 | #include <asm/i8253.h> | ||
35 | #include <asm/irq_vectors.h> | ||
36 | |||
37 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
38 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
39 | |||
40 | static DEFINE_PER_CPU(struct clock_event_device, local_events); | ||
41 | |||
42 | static inline u32 vmi_counter(u32 flags) | ||
43 | { | ||
44 | /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding | ||
45 | * cycle counter. */ | ||
46 | return flags & VMI_ALARM_COUNTER_MASK; | ||
47 | } | ||
48 | |||
49 | /* paravirt_ops.get_wallclock = vmi_get_wallclock */ | ||
50 | unsigned long vmi_get_wallclock(void) | ||
51 | { | ||
52 | unsigned long long wallclock; | ||
53 | wallclock = vmi_timer_ops.get_wallclock(); // nsec | ||
54 | (void)do_div(wallclock, 1000000000); // sec | ||
55 | |||
56 | return wallclock; | ||
57 | } | ||
58 | |||
59 | /* paravirt_ops.set_wallclock = vmi_set_wallclock */ | ||
60 | int vmi_set_wallclock(unsigned long now) | ||
61 | { | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | /* paravirt_ops.sched_clock = vmi_sched_clock */ | ||
66 | unsigned long long vmi_sched_clock(void) | ||
67 | { | ||
68 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); | ||
69 | } | ||
70 | |||
71 | /* x86_platform.calibrate_tsc = vmi_tsc_khz */ | ||
72 | unsigned long vmi_tsc_khz(void) | ||
73 | { | ||
74 | unsigned long long khz; | ||
75 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
76 | (void)do_div(khz, 1000); | ||
77 | return khz; | ||
78 | } | ||
79 | |||
80 | static inline unsigned int vmi_get_timer_vector(void) | ||
81 | { | ||
82 | return IRQ0_VECTOR; | ||
83 | } | ||
84 | |||
85 | /** vmi clockchip */ | ||
86 | #ifdef CONFIG_X86_LOCAL_APIC | ||
87 | static unsigned int startup_timer_irq(unsigned int irq) | ||
88 | { | ||
89 | unsigned long val = apic_read(APIC_LVTT); | ||
90 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
91 | |||
92 | return (val & APIC_SEND_PENDING); | ||
93 | } | ||
94 | |||
95 | static void mask_timer_irq(unsigned int irq) | ||
96 | { | ||
97 | unsigned long val = apic_read(APIC_LVTT); | ||
98 | apic_write(APIC_LVTT, val | APIC_LVT_MASKED); | ||
99 | } | ||
100 | |||
101 | static void unmask_timer_irq(unsigned int irq) | ||
102 | { | ||
103 | unsigned long val = apic_read(APIC_LVTT); | ||
104 | apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED); | ||
105 | } | ||
106 | |||
107 | static void ack_timer_irq(unsigned int irq) | ||
108 | { | ||
109 | ack_APIC_irq(); | ||
110 | } | ||
111 | |||
112 | static struct irq_chip vmi_chip __read_mostly = { | ||
113 | .name = "VMI-LOCAL", | ||
114 | .startup = startup_timer_irq, | ||
115 | .mask = mask_timer_irq, | ||
116 | .unmask = unmask_timer_irq, | ||
117 | .ack = ack_timer_irq | ||
118 | }; | ||
119 | #endif | ||
120 | |||
121 | /** vmi clockevent */ | ||
122 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
123 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
124 | static int vmi_wiring = VMI_ALARM_WIRED_IRQ0; | ||
125 | |||
126 | static inline int vmi_get_alarm_wiring(void) | ||
127 | { | ||
128 | return vmi_wiring; | ||
129 | } | ||
130 | |||
131 | static void vmi_timer_set_mode(enum clock_event_mode mode, | ||
132 | struct clock_event_device *evt) | ||
133 | { | ||
134 | cycle_t now, cycles_per_hz; | ||
135 | BUG_ON(!irqs_disabled()); | ||
136 | |||
137 | switch (mode) { | ||
138 | case CLOCK_EVT_MODE_ONESHOT: | ||
139 | case CLOCK_EVT_MODE_RESUME: | ||
140 | break; | ||
141 | case CLOCK_EVT_MODE_PERIODIC: | ||
142 | cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); | ||
143 | (void)do_div(cycles_per_hz, HZ); | ||
144 | now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC)); | ||
145 | vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz); | ||
146 | break; | ||
147 | case CLOCK_EVT_MODE_UNUSED: | ||
148 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
149 | switch (evt->mode) { | ||
150 | case CLOCK_EVT_MODE_ONESHOT: | ||
151 | vmi_timer_ops.cancel_alarm(VMI_ONESHOT); | ||
152 | break; | ||
153 | case CLOCK_EVT_MODE_PERIODIC: | ||
154 | vmi_timer_ops.cancel_alarm(VMI_PERIODIC); | ||
155 | break; | ||
156 | default: | ||
157 | break; | ||
158 | } | ||
159 | break; | ||
160 | default: | ||
161 | break; | ||
162 | } | ||
163 | } | ||
164 | |||
165 | static int vmi_timer_next_event(unsigned long delta, | ||
166 | struct clock_event_device *evt) | ||
167 | { | ||
168 | /* Unfortunately, set_next_event interface only passes relative | ||
169 | * expiry, but we want absolute expiry. It'd be better if were | ||
170 | * were passed an absolute expiry, since a bunch of time may | ||
171 | * have been stolen between the time the delta is computed and | ||
172 | * when we set the alarm below. */ | ||
173 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | ||
174 | |||
175 | BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
176 | vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | static struct clock_event_device vmi_clockevent = { | ||
181 | .name = "vmi-timer", | ||
182 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
183 | .shift = 22, | ||
184 | .set_mode = vmi_timer_set_mode, | ||
185 | .set_next_event = vmi_timer_next_event, | ||
186 | .rating = 1000, | ||
187 | .irq = 0, | ||
188 | }; | ||
189 | |||
190 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
191 | { | ||
192 | struct clock_event_device *evt = &__get_cpu_var(local_events); | ||
193 | evt->event_handler(evt); | ||
194 | return IRQ_HANDLED; | ||
195 | } | ||
196 | |||
197 | static struct irqaction vmi_clock_action = { | ||
198 | .name = "vmi-timer", | ||
199 | .handler = vmi_timer_interrupt, | ||
200 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, | ||
201 | }; | ||
202 | |||
203 | static void __devinit vmi_time_init_clockevent(void) | ||
204 | { | ||
205 | cycle_t cycles_per_msec; | ||
206 | struct clock_event_device *evt; | ||
207 | |||
208 | int cpu = smp_processor_id(); | ||
209 | evt = &__get_cpu_var(local_events); | ||
210 | |||
211 | /* Use cycles_per_msec since div_sc params are 32-bits. */ | ||
212 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
213 | (void)do_div(cycles_per_msec, 1000); | ||
214 | |||
215 | memcpy(evt, &vmi_clockevent, sizeof(*evt)); | ||
216 | /* Must pick .shift such that .mult fits in 32-bits. Choosing | ||
217 | * .shift to be 22 allows 2^(32-22) cycles per nano-seconds | ||
218 | * before overflow. */ | ||
219 | evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift); | ||
220 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | ||
221 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | ||
222 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | ||
223 | evt->cpumask = cpumask_of(cpu); | ||
224 | |||
225 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n", | ||
226 | evt->name, evt->mult, evt->shift); | ||
227 | clockevents_register_device(evt); | ||
228 | } | ||
229 | |||
230 | void __init vmi_time_init(void) | ||
231 | { | ||
232 | unsigned int cpu; | ||
233 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | ||
234 | outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
235 | |||
236 | vmi_time_init_clockevent(); | ||
237 | setup_irq(0, &vmi_clock_action); | ||
238 | for_each_possible_cpu(cpu) | ||
239 | per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; | ||
240 | } | ||
241 | |||
242 | #ifdef CONFIG_X86_LOCAL_APIC | ||
243 | void __devinit vmi_time_bsp_init(void) | ||
244 | { | ||
245 | /* | ||
246 | * On APIC systems, we want local timers to fire on each cpu. We do | ||
247 | * this by programming LVTT to deliver timer events to the IRQ handler | ||
248 | * for IRQ-0, since we can't re-use the APIC local timer handler | ||
249 | * without interfering with that code. | ||
250 | */ | ||
251 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | ||
252 | local_irq_disable(); | ||
253 | #ifdef CONFIG_SMP | ||
254 | /* | ||
255 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | ||
256 | * to using it, since this is a local interrupt, which each CPU must | ||
257 | * handle individually without locking out or dropping simultaneous | ||
258 | * local timers on other CPUs. We also don't want to trigger the | ||
259 | * quirk workaround code for interrupts which gets invoked from | ||
260 | * handle_percpu_irq via eoi, so we use our own IRQ chip. | ||
261 | */ | ||
262 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt"); | ||
263 | #else | ||
264 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt"); | ||
265 | #endif | ||
266 | vmi_wiring = VMI_ALARM_WIRED_LVTT; | ||
267 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
268 | local_irq_enable(); | ||
269 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | ||
270 | } | ||
271 | |||
272 | void __devinit vmi_time_ap_init(void) | ||
273 | { | ||
274 | vmi_time_init_clockevent(); | ||
275 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
276 | } | ||
277 | #endif | ||
278 | |||
279 | /** vmi clocksource */ | ||
280 | static struct clocksource clocksource_vmi; | ||
281 | |||
282 | static cycle_t read_real_cycles(struct clocksource *cs) | ||
283 | { | ||
284 | cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
285 | return max(ret, clocksource_vmi.cycle_last); | ||
286 | } | ||
287 | |||
288 | static struct clocksource clocksource_vmi = { | ||
289 | .name = "vmi-timer", | ||
290 | .rating = 450, | ||
291 | .read = read_real_cycles, | ||
292 | .mask = CLOCKSOURCE_MASK(64), | ||
293 | .mult = 0, /* to be set */ | ||
294 | .shift = 22, | ||
295 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
296 | }; | ||
297 | |||
298 | static int __init init_vmi_clocksource(void) | ||
299 | { | ||
300 | cycle_t cycles_per_msec; | ||
301 | |||
302 | if (!vmi_timer_ops.get_cycle_frequency) | ||
303 | return 0; | ||
304 | /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */ | ||
305 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
306 | (void)do_div(cycles_per_msec, 1000); | ||
307 | |||
308 | /* Note that clocksource.{mult, shift} converts in the opposite direction | ||
309 | * as clockevents. */ | ||
310 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
311 | clocksource_vmi.shift); | ||
312 | |||
313 | printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec); | ||
314 | return clocksource_register(&clocksource_vmi); | ||
315 | |||
316 | } | ||
317 | module_init(init_vmi_clocksource); | ||