diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 56 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 77 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 24 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 145 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 6 |
6 files changed, 80 insertions, 231 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 2e641be2737e..6c388e593bc8 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -5,8 +5,9 @@ | |||
5 | config XEN | 5 | config XEN |
6 | bool "Xen guest support" | 6 | bool "Xen guest support" |
7 | select PARAVIRT | 7 | select PARAVIRT |
8 | select PARAVIRT_CLOCK | ||
8 | depends on X86_32 | 9 | depends on X86_32 |
9 | depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) | 10 | depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) |
10 | help | 11 | help |
11 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
12 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d61..f09c1c69c37a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
785 | static __init void xen_pagetable_setup_start(pgd_t *base) | 785 | static __init void xen_pagetable_setup_start(pgd_t *base) |
786 | { | 786 | { |
787 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; | 787 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; |
788 | int i; | ||
788 | 789 | ||
789 | /* special set_pte for pagetable initialization */ | 790 | /* special set_pte for pagetable initialization */ |
790 | pv_mmu_ops.set_pte = xen_set_pte_init; | 791 | pv_mmu_ops.set_pte = xen_set_pte_init; |
791 | 792 | ||
792 | init_mm.pgd = base; | 793 | init_mm.pgd = base; |
793 | /* | 794 | /* |
794 | * copy top-level of Xen-supplied pagetable into place. For | 795 | * copy top-level of Xen-supplied pagetable into place. This |
795 | * !PAE we can use this as-is, but for PAE it is a stand-in | 796 | * is a stand-in while we copy the pmd pages. |
796 | * while we copy the pmd pages. | ||
797 | */ | 797 | */ |
798 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); | 798 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); |
799 | 799 | ||
800 | if (PTRS_PER_PMD > 1) { | 800 | /* |
801 | int i; | 801 | * For PAE, need to allocate new pmds, rather than |
802 | /* | 802 | * share Xen's, since Xen doesn't like pmd's being |
803 | * For PAE, need to allocate new pmds, rather than | 803 | * shared between address spaces. |
804 | * share Xen's, since Xen doesn't like pmd's being | 804 | */ |
805 | * shared between address spaces. | 805 | for (i = 0; i < PTRS_PER_PGD; i++) { |
806 | */ | 806 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { |
807 | for (i = 0; i < PTRS_PER_PGD; i++) { | 807 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
808 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { | ||
809 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | ||
810 | 808 | ||
811 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), | 809 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), |
812 | PAGE_SIZE); | 810 | PAGE_SIZE); |
813 | 811 | ||
814 | make_lowmem_page_readonly(pmd); | 812 | make_lowmem_page_readonly(pmd); |
815 | 813 | ||
816 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); | 814 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); |
817 | } else | 815 | } else |
818 | pgd_clear(&base[i]); | 816 | pgd_clear(&base[i]); |
819 | } | ||
820 | } | 817 | } |
821 | 818 | ||
822 | /* make sure zero_page is mapped RO so we can use it in pagetables */ | 819 | /* make sure zero_page is mapped RO so we can use it in pagetables */ |
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
873 | 870 | ||
874 | /* Actually pin the pagetable down, but we can't set PG_pinned | 871 | /* Actually pin the pagetable down, but we can't set PG_pinned |
875 | yet because the page structures don't exist yet. */ | 872 | yet because the page structures don't exist yet. */ |
876 | { | 873 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); |
877 | unsigned level; | ||
878 | |||
879 | #ifdef CONFIG_X86_PAE | ||
880 | level = MMUEXT_PIN_L3_TABLE; | ||
881 | #else | ||
882 | level = MMUEXT_PIN_L2_TABLE; | ||
883 | #endif | ||
884 | |||
885 | pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); | ||
886 | } | ||
887 | } | 874 | } |
888 | 875 | ||
889 | /* This is called once we have the cpu_possible_map */ | 876 | /* This is called once we have the cpu_possible_map */ |
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1093 | .make_pte = xen_make_pte, | 1080 | .make_pte = xen_make_pte, |
1094 | .make_pgd = xen_make_pgd, | 1081 | .make_pgd = xen_make_pgd, |
1095 | 1082 | ||
1096 | #ifdef CONFIG_X86_PAE | ||
1097 | .set_pte_atomic = xen_set_pte_atomic, | 1083 | .set_pte_atomic = xen_set_pte_atomic, |
1098 | .set_pte_present = xen_set_pte_at, | 1084 | .set_pte_present = xen_set_pte_at, |
1099 | .set_pud = xen_set_pud, | 1085 | .set_pud = xen_set_pud, |
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1102 | 1088 | ||
1103 | .make_pmd = xen_make_pmd, | 1089 | .make_pmd = xen_make_pmd, |
1104 | .pmd_val = xen_pmd_val, | 1090 | .pmd_val = xen_pmd_val, |
1105 | #endif /* PAE */ | ||
1106 | 1091 | ||
1107 | .activate_mm = xen_activate_mm, | 1092 | .activate_mm = xen_activate_mm, |
1108 | .dup_mmap = xen_dup_mmap, | 1093 | .dup_mmap = xen_dup_mmap, |
@@ -1228,6 +1213,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1228 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1213 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
1229 | pv_info.kernel_rpl = 0; | 1214 | pv_info.kernel_rpl = 0; |
1230 | 1215 | ||
1216 | /* Prevent unwanted bits from being set in PTEs. */ | ||
1217 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
1218 | if (!is_initial_xendomain()) | ||
1219 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1220 | |||
1231 | /* set the limit of our address space */ | 1221 | /* set the limit of our address space */ |
1232 | xen_reserve_top(); | 1222 | xen_reserve_top(); |
1233 | 1223 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 126766d43aea..df40bf74ea75 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -60,7 +60,7 @@ xmaddr_t arbitrary_virt_to_machine(unsigned long address) | |||
60 | { | 60 | { |
61 | unsigned int level; | 61 | unsigned int level; |
62 | pte_t *pte = lookup_address(address, &level); | 62 | pte_t *pte = lookup_address(address, &level); |
63 | unsigned offset = address & PAGE_MASK; | 63 | unsigned offset = address & ~PAGE_MASK; |
64 | 64 | ||
65 | BUG_ON(pte == NULL); | 65 | BUG_ON(pte == NULL); |
66 | 66 | ||
@@ -179,50 +179,56 @@ out: | |||
179 | preempt_enable(); | 179 | preempt_enable(); |
180 | } | 180 | } |
181 | 181 | ||
182 | pteval_t xen_pte_val(pte_t pte) | 182 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
183 | static pteval_t pte_mfn_to_pfn(pteval_t val) | ||
184 | { | ||
185 | if (val & _PAGE_PRESENT) { | ||
186 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; | ||
187 | pteval_t flags = val & ~PTE_MASK; | ||
188 | val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; | ||
189 | } | ||
190 | |||
191 | return val; | ||
192 | } | ||
193 | |||
194 | static pteval_t pte_pfn_to_mfn(pteval_t val) | ||
183 | { | 195 | { |
184 | pteval_t ret = pte.pte; | 196 | if (val & _PAGE_PRESENT) { |
197 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; | ||
198 | pteval_t flags = val & ~PTE_MASK; | ||
199 | val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | ||
200 | } | ||
185 | 201 | ||
186 | if (ret & _PAGE_PRESENT) | 202 | return val; |
187 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 203 | } |
188 | 204 | ||
189 | return ret; | 205 | pteval_t xen_pte_val(pte_t pte) |
206 | { | ||
207 | return pte_mfn_to_pfn(pte.pte); | ||
190 | } | 208 | } |
191 | 209 | ||
192 | pgdval_t xen_pgd_val(pgd_t pgd) | 210 | pgdval_t xen_pgd_val(pgd_t pgd) |
193 | { | 211 | { |
194 | pgdval_t ret = pgd.pgd; | 212 | return pte_mfn_to_pfn(pgd.pgd); |
195 | if (ret & _PAGE_PRESENT) | ||
196 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
197 | return ret; | ||
198 | } | 213 | } |
199 | 214 | ||
200 | pte_t xen_make_pte(pteval_t pte) | 215 | pte_t xen_make_pte(pteval_t pte) |
201 | { | 216 | { |
202 | if (pte & _PAGE_PRESENT) { | 217 | pte = pte_pfn_to_mfn(pte); |
203 | pte = phys_to_machine(XPADDR(pte)).maddr; | 218 | return native_make_pte(pte); |
204 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | ||
205 | } | ||
206 | |||
207 | return (pte_t){ .pte = pte }; | ||
208 | } | 219 | } |
209 | 220 | ||
210 | pgd_t xen_make_pgd(pgdval_t pgd) | 221 | pgd_t xen_make_pgd(pgdval_t pgd) |
211 | { | 222 | { |
212 | if (pgd & _PAGE_PRESENT) | 223 | pgd = pte_pfn_to_mfn(pgd); |
213 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | 224 | return native_make_pgd(pgd); |
214 | |||
215 | return (pgd_t){ pgd }; | ||
216 | } | 225 | } |
217 | 226 | ||
218 | pmdval_t xen_pmd_val(pmd_t pmd) | 227 | pmdval_t xen_pmd_val(pmd_t pmd) |
219 | { | 228 | { |
220 | pmdval_t ret = native_pmd_val(pmd); | 229 | return pte_mfn_to_pfn(pmd.pmd); |
221 | if (ret & _PAGE_PRESENT) | ||
222 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
223 | return ret; | ||
224 | } | 230 | } |
225 | #ifdef CONFIG_X86_PAE | 231 | |
226 | void xen_set_pud(pud_t *ptr, pud_t val) | 232 | void xen_set_pud(pud_t *ptr, pud_t val) |
227 | { | 233 | { |
228 | struct multicall_space mcs; | 234 | struct multicall_space mcs; |
@@ -267,17 +273,9 @@ void xen_pmd_clear(pmd_t *pmdp) | |||
267 | 273 | ||
268 | pmd_t xen_make_pmd(pmdval_t pmd) | 274 | pmd_t xen_make_pmd(pmdval_t pmd) |
269 | { | 275 | { |
270 | if (pmd & _PAGE_PRESENT) | 276 | pmd = pte_pfn_to_mfn(pmd); |
271 | pmd = phys_to_machine(XPADDR(pmd)).maddr; | ||
272 | |||
273 | return native_make_pmd(pmd); | 277 | return native_make_pmd(pmd); |
274 | } | 278 | } |
275 | #else /* !PAE */ | ||
276 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
277 | { | ||
278 | *ptep = pte; | ||
279 | } | ||
280 | #endif /* CONFIG_X86_PAE */ | ||
281 | 279 | ||
282 | /* | 280 | /* |
283 | (Yet another) pagetable walker. This one is intended for pinning a | 281 | (Yet another) pagetable walker. This one is intended for pinning a |
@@ -430,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level) | |||
430 | read-only, and can be pinned. */ | 428 | read-only, and can be pinned. */ |
431 | void xen_pgd_pin(pgd_t *pgd) | 429 | void xen_pgd_pin(pgd_t *pgd) |
432 | { | 430 | { |
433 | unsigned level; | ||
434 | |||
435 | xen_mc_batch(); | 431 | xen_mc_batch(); |
436 | 432 | ||
437 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 433 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { |
@@ -441,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_batch(); | 437 | xen_mc_batch(); |
442 | } | 438 | } |
443 | 439 | ||
444 | #ifdef CONFIG_X86_PAE | 440 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
445 | level = MMUEXT_PIN_L3_TABLE; | ||
446 | #else | ||
447 | level = MMUEXT_PIN_L2_TABLE; | ||
448 | #endif | ||
449 | |||
450 | xen_do_pin(level, PFN_DOWN(__pa(pgd))); | ||
451 | |||
452 | xen_mc_issue(0); | 441 | xen_mc_issue(0); |
453 | } | 442 | } |
454 | 443 | ||
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index b5e189b1519d..5fe961caffd4 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm); | |||
37 | void xen_pgd_pin(pgd_t *pgd); | 37 | void xen_pgd_pin(pgd_t *pgd); |
38 | //void xen_pgd_unpin(pgd_t *pgd); | 38 | //void xen_pgd_unpin(pgd_t *pgd); |
39 | 39 | ||
40 | #ifdef CONFIG_X86_PAE | 40 | pteval_t xen_pte_val(pte_t); |
41 | unsigned long long xen_pte_val(pte_t); | 41 | pmdval_t xen_pmd_val(pmd_t); |
42 | unsigned long long xen_pmd_val(pmd_t); | 42 | pgdval_t xen_pgd_val(pgd_t); |
43 | unsigned long long xen_pgd_val(pgd_t); | ||
44 | 43 | ||
45 | pte_t xen_make_pte(unsigned long long); | 44 | pte_t xen_make_pte(pteval_t); |
46 | pmd_t xen_make_pmd(unsigned long long); | 45 | pmd_t xen_make_pmd(pmdval_t); |
47 | pgd_t xen_make_pgd(unsigned long long); | 46 | pgd_t xen_make_pgd(pgdval_t); |
48 | 47 | ||
49 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 48 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
50 | pte_t *ptep, pte_t pteval); | 49 | pte_t *ptep, pte_t pteval); |
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val); | |||
53 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
54 | void xen_pmd_clear(pmd_t *pmdp); | 53 | void xen_pmd_clear(pmd_t *pmdp); |
55 | 54 | ||
56 | |||
57 | #else | ||
58 | unsigned long xen_pte_val(pte_t); | ||
59 | unsigned long xen_pmd_val(pmd_t); | ||
60 | unsigned long xen_pgd_val(pgd_t); | ||
61 | |||
62 | pte_t xen_make_pte(unsigned long); | ||
63 | pmd_t xen_make_pmd(unsigned long); | ||
64 | pgd_t xen_make_pgd(unsigned long); | ||
65 | #endif | ||
66 | |||
67 | #endif /* _XEN_MMU_H */ | 55 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..41e217503c96 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -12,7 +12,9 @@ | |||
12 | #include <linux/clocksource.h> | 12 | #include <linux/clocksource.h> |
13 | #include <linux/clockchips.h> | 13 | #include <linux/clockchips.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/math64.h> | ||
15 | 16 | ||
17 | #include <asm/pvclock.h> | ||
16 | #include <asm/xen/hypervisor.h> | 18 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 19 | #include <asm/xen/hypercall.h> |
18 | 20 | ||
@@ -30,17 +32,6 @@ | |||
30 | 32 | ||
31 | static cycle_t xen_clocksource_read(void); | 33 | static cycle_t xen_clocksource_read(void); |
32 | 34 | ||
33 | /* These are perodically updated in shared_info, and then copied here. */ | ||
34 | struct shadow_time_info { | ||
35 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
36 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
37 | u32 tsc_to_nsec_mul; | ||
38 | int tsc_shift; | ||
39 | u32 version; | ||
40 | }; | ||
41 | |||
42 | static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); | ||
43 | |||
44 | /* runstate info updated by Xen */ | 35 | /* runstate info updated by Xen */ |
45 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | 36 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
46 | 37 | ||
@@ -150,11 +141,7 @@ static void do_stolen_accounting(void) | |||
150 | if (stolen < 0) | 141 | if (stolen < 0) |
151 | stolen = 0; | 142 | stolen = 0; |
152 | 143 | ||
153 | ticks = 0; | 144 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
154 | while (stolen >= NS_PER_TICK) { | ||
155 | ticks++; | ||
156 | stolen -= NS_PER_TICK; | ||
157 | } | ||
158 | __get_cpu_var(residual_stolen) = stolen; | 145 | __get_cpu_var(residual_stolen) = stolen; |
159 | account_steal_time(NULL, ticks); | 146 | account_steal_time(NULL, ticks); |
160 | 147 | ||
@@ -166,11 +153,7 @@ static void do_stolen_accounting(void) | |||
166 | if (blocked < 0) | 153 | if (blocked < 0) |
167 | blocked = 0; | 154 | blocked = 0; |
168 | 155 | ||
169 | ticks = 0; | 156 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
170 | while (blocked >= NS_PER_TICK) { | ||
171 | ticks++; | ||
172 | blocked -= NS_PER_TICK; | ||
173 | } | ||
174 | __get_cpu_var(residual_blocked) = blocked; | 157 | __get_cpu_var(residual_blocked) = blocked; |
175 | account_steal_time(idle_task(smp_processor_id()), ticks); | 158 | account_steal_time(idle_task(smp_processor_id()), ticks); |
176 | } | 159 | } |
@@ -218,7 +201,7 @@ unsigned long long xen_sched_clock(void) | |||
218 | unsigned long xen_cpu_khz(void) | 201 | unsigned long xen_cpu_khz(void) |
219 | { | 202 | { |
220 | u64 xen_khz = 1000000ULL << 32; | 203 | u64 xen_khz = 1000000ULL << 32; |
221 | const struct vcpu_time_info *info = | 204 | const struct pvclock_vcpu_time_info *info = |
222 | &HYPERVISOR_shared_info->vcpu_info[0].time; | 205 | &HYPERVISOR_shared_info->vcpu_info[0].time; |
223 | 206 | ||
224 | do_div(xen_khz, info->tsc_to_system_mul); | 207 | do_div(xen_khz, info->tsc_to_system_mul); |
@@ -230,121 +213,26 @@ unsigned long xen_cpu_khz(void) | |||
230 | return xen_khz; | 213 | return xen_khz; |
231 | } | 214 | } |
232 | 215 | ||
233 | /* | ||
234 | * Reads a consistent set of time-base values from Xen, into a shadow data | ||
235 | * area. | ||
236 | */ | ||
237 | static unsigned get_time_values_from_xen(void) | ||
238 | { | ||
239 | struct vcpu_time_info *src; | ||
240 | struct shadow_time_info *dst; | ||
241 | |||
242 | /* src is shared memory with the hypervisor, so we need to | ||
243 | make sure we get a consistent snapshot, even in the face of | ||
244 | being preempted. */ | ||
245 | src = &__get_cpu_var(xen_vcpu)->time; | ||
246 | dst = &__get_cpu_var(shadow_time); | ||
247 | |||
248 | do { | ||
249 | dst->version = src->version; | ||
250 | rmb(); /* fetch version before data */ | ||
251 | dst->tsc_timestamp = src->tsc_timestamp; | ||
252 | dst->system_timestamp = src->system_time; | ||
253 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
254 | dst->tsc_shift = src->tsc_shift; | ||
255 | rmb(); /* test version after fetching data */ | ||
256 | } while ((src->version & 1) | (dst->version ^ src->version)); | ||
257 | |||
258 | return dst->version; | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
263 | * yielding a 64-bit result. | ||
264 | */ | ||
265 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
266 | { | ||
267 | u64 product; | ||
268 | #ifdef __i386__ | ||
269 | u32 tmp1, tmp2; | ||
270 | #endif | ||
271 | |||
272 | if (shift < 0) | ||
273 | delta >>= -shift; | ||
274 | else | ||
275 | delta <<= shift; | ||
276 | |||
277 | #ifdef __i386__ | ||
278 | __asm__ ( | ||
279 | "mul %5 ; " | ||
280 | "mov %4,%%eax ; " | ||
281 | "mov %%edx,%4 ; " | ||
282 | "mul %5 ; " | ||
283 | "xor %5,%5 ; " | ||
284 | "add %4,%%eax ; " | ||
285 | "adc %5,%%edx ; " | ||
286 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
287 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
288 | #elif __x86_64__ | ||
289 | __asm__ ( | ||
290 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
291 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
292 | #else | ||
293 | #error implement me! | ||
294 | #endif | ||
295 | |||
296 | return product; | ||
297 | } | ||
298 | |||
299 | static u64 get_nsec_offset(struct shadow_time_info *shadow) | ||
300 | { | ||
301 | u64 now, delta; | ||
302 | now = native_read_tsc(); | ||
303 | delta = now - shadow->tsc_timestamp; | ||
304 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
305 | } | ||
306 | |||
307 | static cycle_t xen_clocksource_read(void) | 216 | static cycle_t xen_clocksource_read(void) |
308 | { | 217 | { |
309 | struct shadow_time_info *shadow = &get_cpu_var(shadow_time); | 218 | struct pvclock_vcpu_time_info *src; |
310 | cycle_t ret; | 219 | cycle_t ret; |
311 | unsigned version; | ||
312 | |||
313 | do { | ||
314 | version = get_time_values_from_xen(); | ||
315 | barrier(); | ||
316 | ret = shadow->system_timestamp + get_nsec_offset(shadow); | ||
317 | barrier(); | ||
318 | } while (version != __get_cpu_var(xen_vcpu)->time.version); | ||
319 | |||
320 | put_cpu_var(shadow_time); | ||
321 | 220 | ||
221 | src = &get_cpu_var(xen_vcpu)->time; | ||
222 | ret = pvclock_clocksource_read(src); | ||
223 | put_cpu_var(xen_vcpu); | ||
322 | return ret; | 224 | return ret; |
323 | } | 225 | } |
324 | 226 | ||
325 | static void xen_read_wallclock(struct timespec *ts) | 227 | static void xen_read_wallclock(struct timespec *ts) |
326 | { | 228 | { |
327 | const struct shared_info *s = HYPERVISOR_shared_info; | 229 | struct shared_info *s = HYPERVISOR_shared_info; |
328 | u32 version; | 230 | struct pvclock_wall_clock *wall_clock = &(s->wc); |
329 | u64 delta; | 231 | struct pvclock_vcpu_time_info *vcpu_time; |
330 | struct timespec now; | ||
331 | |||
332 | /* get wallclock at system boot */ | ||
333 | do { | ||
334 | version = s->wc_version; | ||
335 | rmb(); /* fetch version before time */ | ||
336 | now.tv_sec = s->wc_sec; | ||
337 | now.tv_nsec = s->wc_nsec; | ||
338 | rmb(); /* fetch time before checking version */ | ||
339 | } while ((s->wc_version & 1) | (version ^ s->wc_version)); | ||
340 | 232 | ||
341 | delta = xen_clocksource_read(); /* time since system boot */ | 233 | vcpu_time = &get_cpu_var(xen_vcpu)->time; |
342 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | 234 | pvclock_read_wallclock(wall_clock, vcpu_time, ts); |
343 | 235 | put_cpu_var(xen_vcpu); | |
344 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
345 | now.tv_sec = delta; | ||
346 | |||
347 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | ||
348 | } | 236 | } |
349 | 237 | ||
350 | unsigned long xen_get_wallclock(void) | 238 | unsigned long xen_get_wallclock(void) |
@@ -352,7 +240,6 @@ unsigned long xen_get_wallclock(void) | |||
352 | struct timespec ts; | 240 | struct timespec ts; |
353 | 241 | ||
354 | xen_read_wallclock(&ts); | 242 | xen_read_wallclock(&ts); |
355 | |||
356 | return ts.tv_sec; | 243 | return ts.tv_sec; |
357 | } | 244 | } |
358 | 245 | ||
@@ -576,8 +463,6 @@ __init void xen_time_init(void) | |||
576 | { | 463 | { |
577 | int cpu = smp_processor_id(); | 464 | int cpu = smp_processor_id(); |
578 | 465 | ||
579 | get_time_values_from_xen(); | ||
580 | |||
581 | clocksource_register(&xen_clocksource); | 466 | clocksource_register(&xen_clocksource); |
582 | 467 | ||
583 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | 468 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 288d587ce73c..6ec3b4f7719b 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -17,7 +17,7 @@ ENTRY(startup_xen) | |||
17 | 17 | ||
18 | __FINIT | 18 | __FINIT |
19 | 19 | ||
20 | .pushsection .bss.page_aligned | 20 | .pushsection .text |
21 | .align PAGE_SIZE_asm | 21 | .align PAGE_SIZE_asm |
22 | ENTRY(hypercall_page) | 22 | ENTRY(hypercall_page) |
23 | .skip 0x1000 | 23 | .skip 0x1000 |
@@ -30,11 +30,7 @@ ENTRY(hypercall_page) | |||
30 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) | 30 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) |
31 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) | 31 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) |
32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") |
33 | #ifdef CONFIG_X86_PAE | ||
34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 33 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
35 | #else | ||
36 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") | ||
37 | #endif | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 34 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
39 | 35 | ||
40 | #endif /*CONFIG_XEN */ | 36 | #endif /*CONFIG_XEN */ |