diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-24 12:06:47 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-24 12:06:47 -0400 |
commit | c61264f98c1a974ee6f545f61a4ab33b141d6bda (patch) | |
tree | 490e82c3bbf1403098b582f589eec378be727d4c /arch/x86/xen | |
parent | a23a334bd547e9462d9ca4a74608519a1e928848 (diff) | |
parent | bd9ddc875b6659f9f74dcfd285c472bc58041abd (diff) |
Merge branch 'upstream/xen-tracing2' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'upstream/xen-tracing2' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
xen/trace: use class for multicall trace
xen/trace: convert mmu events to use DECLARE_EVENT_CLASS()/DEFINE_EVENT()
xen/multicall: move *idx fields to start of mc_buffer
xen/multicall: special-case singleton hypercalls
xen/multicalls: add unlikely around slowpath in __xen_mc_entry()
xen/multicalls: disable MC_DEBUG
xen/mmu: tune pgtable alloc/release
xen/mmu: use extend_args for more mmuext updates
xen/trace: add tlb flush tracepoints
xen/trace: add segment desc tracing
xen/trace: add xen_pgd_(un)pin tracepoints
xen/trace: add ptpage alloc/release tracepoints
xen/trace: add mmu tracepoints
xen/trace: add multicall tracing
xen/trace: set up tracepoint skeleton
xen/multicalls: remove debugfs stats
trace/xen: add skeleton for Xen trace events
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 16 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 139 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.c | 169 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 6 | ||||
-rw-r--r-- | arch/x86/xen/trace.c | 61 |
6 files changed, 243 insertions, 150 deletions
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index a6575b949b1..ccf73b2f3e6 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp) | |||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o platform-pci-unplug.o \ | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | 16 | p2m.o trace.o |
17 | 17 | ||
18 | obj-$(CONFIG_SMP) += smp.o | 18 | obj-$(CONFIG_SMP) += smp.o |
19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 53257421082..974a528458a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -341,6 +341,8 @@ static void xen_set_ldt(const void *addr, unsigned entries) | |||
341 | struct mmuext_op *op; | 341 | struct mmuext_op *op; |
342 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | 342 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); |
343 | 343 | ||
344 | trace_xen_cpu_set_ldt(addr, entries); | ||
345 | |||
344 | op = mcs.args; | 346 | op = mcs.args; |
345 | op->cmd = MMUEXT_SET_LDT; | 347 | op->cmd = MMUEXT_SET_LDT; |
346 | op->arg1.linear_addr = (unsigned long)addr; | 348 | op->arg1.linear_addr = (unsigned long)addr; |
@@ -496,6 +498,8 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
496 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); | 498 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); |
497 | u64 entry = *(u64 *)ptr; | 499 | u64 entry = *(u64 *)ptr; |
498 | 500 | ||
501 | trace_xen_cpu_write_ldt_entry(dt, entrynum, entry); | ||
502 | |||
499 | preempt_disable(); | 503 | preempt_disable(); |
500 | 504 | ||
501 | xen_mc_flush(); | 505 | xen_mc_flush(); |
@@ -565,6 +569,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) | |||
565 | unsigned long p = (unsigned long)&dt[entrynum]; | 569 | unsigned long p = (unsigned long)&dt[entrynum]; |
566 | unsigned long start, end; | 570 | unsigned long start, end; |
567 | 571 | ||
572 | trace_xen_cpu_write_idt_entry(dt, entrynum, g); | ||
573 | |||
568 | preempt_disable(); | 574 | preempt_disable(); |
569 | 575 | ||
570 | start = __this_cpu_read(idt_desc.address); | 576 | start = __this_cpu_read(idt_desc.address); |
@@ -619,6 +625,8 @@ static void xen_load_idt(const struct desc_ptr *desc) | |||
619 | static DEFINE_SPINLOCK(lock); | 625 | static DEFINE_SPINLOCK(lock); |
620 | static struct trap_info traps[257]; | 626 | static struct trap_info traps[257]; |
621 | 627 | ||
628 | trace_xen_cpu_load_idt(desc); | ||
629 | |||
622 | spin_lock(&lock); | 630 | spin_lock(&lock); |
623 | 631 | ||
624 | __get_cpu_var(idt_desc) = *desc; | 632 | __get_cpu_var(idt_desc) = *desc; |
@@ -637,6 +645,8 @@ static void xen_load_idt(const struct desc_ptr *desc) | |||
637 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | 645 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, |
638 | const void *desc, int type) | 646 | const void *desc, int type) |
639 | { | 647 | { |
648 | trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); | ||
649 | |||
640 | preempt_disable(); | 650 | preempt_disable(); |
641 | 651 | ||
642 | switch (type) { | 652 | switch (type) { |
@@ -665,6 +675,8 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
665 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | 675 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, |
666 | const void *desc, int type) | 676 | const void *desc, int type) |
667 | { | 677 | { |
678 | trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); | ||
679 | |||
668 | switch (type) { | 680 | switch (type) { |
669 | case DESC_LDT: | 681 | case DESC_LDT: |
670 | case DESC_TSS: | 682 | case DESC_TSS: |
@@ -684,7 +696,9 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | |||
684 | static void xen_load_sp0(struct tss_struct *tss, | 696 | static void xen_load_sp0(struct tss_struct *tss, |
685 | struct thread_struct *thread) | 697 | struct thread_struct *thread) |
686 | { | 698 | { |
687 | struct multicall_space mcs = xen_mc_entry(0); | 699 | struct multicall_space mcs; |
700 | |||
701 | mcs = xen_mc_entry(0); | ||
688 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 702 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
689 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 703 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
690 | } | 704 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0ccccb67a99..f987bde77c4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -48,6 +48,8 @@ | |||
48 | #include <linux/memblock.h> | 48 | #include <linux/memblock.h> |
49 | #include <linux/seq_file.h> | 49 | #include <linux/seq_file.h> |
50 | 50 | ||
51 | #include <trace/events/xen.h> | ||
52 | |||
51 | #include <asm/pgtable.h> | 53 | #include <asm/pgtable.h> |
52 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
53 | #include <asm/fixmap.h> | 55 | #include <asm/fixmap.h> |
@@ -194,6 +196,8 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) | |||
194 | struct multicall_space mcs; | 196 | struct multicall_space mcs; |
195 | struct mmu_update *u; | 197 | struct mmu_update *u; |
196 | 198 | ||
199 | trace_xen_mmu_set_domain_pte(ptep, pteval, domid); | ||
200 | |||
197 | mcs = xen_mc_entry(sizeof(*u)); | 201 | mcs = xen_mc_entry(sizeof(*u)); |
198 | u = mcs.args; | 202 | u = mcs.args; |
199 | 203 | ||
@@ -225,6 +229,24 @@ static void xen_extend_mmu_update(const struct mmu_update *update) | |||
225 | *u = *update; | 229 | *u = *update; |
226 | } | 230 | } |
227 | 231 | ||
232 | static void xen_extend_mmuext_op(const struct mmuext_op *op) | ||
233 | { | ||
234 | struct multicall_space mcs; | ||
235 | struct mmuext_op *u; | ||
236 | |||
237 | mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u)); | ||
238 | |||
239 | if (mcs.mc != NULL) { | ||
240 | mcs.mc->args[1]++; | ||
241 | } else { | ||
242 | mcs = __xen_mc_entry(sizeof(*u)); | ||
243 | MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
244 | } | ||
245 | |||
246 | u = mcs.args; | ||
247 | *u = *op; | ||
248 | } | ||
249 | |||
228 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | 250 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) |
229 | { | 251 | { |
230 | struct mmu_update u; | 252 | struct mmu_update u; |
@@ -245,6 +267,8 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
245 | 267 | ||
246 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) | 268 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) |
247 | { | 269 | { |
270 | trace_xen_mmu_set_pmd(ptr, val); | ||
271 | |||
248 | /* If page is not pinned, we can just update the entry | 272 | /* If page is not pinned, we can just update the entry |
249 | directly */ | 273 | directly */ |
250 | if (!xen_page_pinned(ptr)) { | 274 | if (!xen_page_pinned(ptr)) { |
@@ -282,22 +306,30 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
282 | return true; | 306 | return true; |
283 | } | 307 | } |
284 | 308 | ||
285 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
286 | { | 310 | { |
287 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) |
288 | native_set_pte(ptep, pteval); | 312 | native_set_pte(ptep, pteval); |
289 | } | 313 | } |
290 | 314 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | ||
316 | { | ||
317 | trace_xen_mmu_set_pte(ptep, pteval); | ||
318 | __xen_set_pte(ptep, pteval); | ||
319 | } | ||
320 | |||
291 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 321 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
292 | pte_t *ptep, pte_t pteval) | 322 | pte_t *ptep, pte_t pteval) |
293 | { | 323 | { |
294 | xen_set_pte(ptep, pteval); | 324 | trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval); |
325 | __xen_set_pte(ptep, pteval); | ||
295 | } | 326 | } |
296 | 327 | ||
297 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 328 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
298 | unsigned long addr, pte_t *ptep) | 329 | unsigned long addr, pte_t *ptep) |
299 | { | 330 | { |
300 | /* Just return the pte as-is. We preserve the bits on commit */ | 331 | /* Just return the pte as-is. We preserve the bits on commit */ |
332 | trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep); | ||
301 | return *ptep; | 333 | return *ptep; |
302 | } | 334 | } |
303 | 335 | ||
@@ -306,6 +338,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
306 | { | 338 | { |
307 | struct mmu_update u; | 339 | struct mmu_update u; |
308 | 340 | ||
341 | trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte); | ||
309 | xen_mc_batch(); | 342 | xen_mc_batch(); |
310 | 343 | ||
311 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 344 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
@@ -530,6 +563,8 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
530 | 563 | ||
531 | static void xen_set_pud(pud_t *ptr, pud_t val) | 564 | static void xen_set_pud(pud_t *ptr, pud_t val) |
532 | { | 565 | { |
566 | trace_xen_mmu_set_pud(ptr, val); | ||
567 | |||
533 | /* If page is not pinned, we can just update the entry | 568 | /* If page is not pinned, we can just update the entry |
534 | directly */ | 569 | directly */ |
535 | if (!xen_page_pinned(ptr)) { | 570 | if (!xen_page_pinned(ptr)) { |
@@ -543,17 +578,20 @@ static void xen_set_pud(pud_t *ptr, pud_t val) | |||
543 | #ifdef CONFIG_X86_PAE | 578 | #ifdef CONFIG_X86_PAE |
544 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | 579 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
545 | { | 580 | { |
581 | trace_xen_mmu_set_pte_atomic(ptep, pte); | ||
546 | set_64bit((u64 *)ptep, native_pte_val(pte)); | 582 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
547 | } | 583 | } |
548 | 584 | ||
549 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 585 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
550 | { | 586 | { |
587 | trace_xen_mmu_pte_clear(mm, addr, ptep); | ||
551 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) | 588 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) |
552 | native_pte_clear(mm, addr, ptep); | 589 | native_pte_clear(mm, addr, ptep); |
553 | } | 590 | } |
554 | 591 | ||
555 | static void xen_pmd_clear(pmd_t *pmdp) | 592 | static void xen_pmd_clear(pmd_t *pmdp) |
556 | { | 593 | { |
594 | trace_xen_mmu_pmd_clear(pmdp); | ||
557 | set_pmd(pmdp, __pmd(0)); | 595 | set_pmd(pmdp, __pmd(0)); |
558 | } | 596 | } |
559 | #endif /* CONFIG_X86_PAE */ | 597 | #endif /* CONFIG_X86_PAE */ |
@@ -629,6 +667,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
629 | { | 667 | { |
630 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 668 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
631 | 669 | ||
670 | trace_xen_mmu_set_pgd(ptr, user_ptr, val); | ||
671 | |||
632 | /* If page is not pinned, we can just update the entry | 672 | /* If page is not pinned, we can just update the entry |
633 | directly */ | 673 | directly */ |
634 | if (!xen_page_pinned(ptr)) { | 674 | if (!xen_page_pinned(ptr)) { |
@@ -788,14 +828,12 @@ static void xen_pte_unlock(void *v) | |||
788 | 828 | ||
789 | static void xen_do_pin(unsigned level, unsigned long pfn) | 829 | static void xen_do_pin(unsigned level, unsigned long pfn) |
790 | { | 830 | { |
791 | struct mmuext_op *op; | 831 | struct mmuext_op op; |
792 | struct multicall_space mcs; | ||
793 | 832 | ||
794 | mcs = __xen_mc_entry(sizeof(*op)); | 833 | op.cmd = level; |
795 | op = mcs.args; | 834 | op.arg1.mfn = pfn_to_mfn(pfn); |
796 | op->cmd = level; | 835 | |
797 | op->arg1.mfn = pfn_to_mfn(pfn); | 836 | xen_extend_mmuext_op(&op); |
798 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
799 | } | 837 | } |
800 | 838 | ||
801 | static int xen_pin_page(struct mm_struct *mm, struct page *page, | 839 | static int xen_pin_page(struct mm_struct *mm, struct page *page, |
@@ -863,6 +901,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, | |||
863 | read-only, and can be pinned. */ | 901 | read-only, and can be pinned. */ |
864 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) | 902 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) |
865 | { | 903 | { |
904 | trace_xen_mmu_pgd_pin(mm, pgd); | ||
905 | |||
866 | xen_mc_batch(); | 906 | xen_mc_batch(); |
867 | 907 | ||
868 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { | 908 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { |
@@ -988,6 +1028,8 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page, | |||
988 | /* Release a pagetables pages back as normal RW */ | 1028 | /* Release a pagetables pages back as normal RW */ |
989 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) | 1029 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) |
990 | { | 1030 | { |
1031 | trace_xen_mmu_pgd_unpin(mm, pgd); | ||
1032 | |||
991 | xen_mc_batch(); | 1033 | xen_mc_batch(); |
992 | 1034 | ||
993 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1035 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
@@ -1196,6 +1238,8 @@ static void xen_flush_tlb(void) | |||
1196 | struct mmuext_op *op; | 1238 | struct mmuext_op *op; |
1197 | struct multicall_space mcs; | 1239 | struct multicall_space mcs; |
1198 | 1240 | ||
1241 | trace_xen_mmu_flush_tlb(0); | ||
1242 | |||
1199 | preempt_disable(); | 1243 | preempt_disable(); |
1200 | 1244 | ||
1201 | mcs = xen_mc_entry(sizeof(*op)); | 1245 | mcs = xen_mc_entry(sizeof(*op)); |
@@ -1214,6 +1258,8 @@ static void xen_flush_tlb_single(unsigned long addr) | |||
1214 | struct mmuext_op *op; | 1258 | struct mmuext_op *op; |
1215 | struct multicall_space mcs; | 1259 | struct multicall_space mcs; |
1216 | 1260 | ||
1261 | trace_xen_mmu_flush_tlb_single(addr); | ||
1262 | |||
1217 | preempt_disable(); | 1263 | preempt_disable(); |
1218 | 1264 | ||
1219 | mcs = xen_mc_entry(sizeof(*op)); | 1265 | mcs = xen_mc_entry(sizeof(*op)); |
@@ -1240,6 +1286,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1240 | } *args; | 1286 | } *args; |
1241 | struct multicall_space mcs; | 1287 | struct multicall_space mcs; |
1242 | 1288 | ||
1289 | trace_xen_mmu_flush_tlb_others(cpus, mm, va); | ||
1290 | |||
1243 | if (cpumask_empty(cpus)) | 1291 | if (cpumask_empty(cpus)) |
1244 | return; /* nothing to do */ | 1292 | return; /* nothing to do */ |
1245 | 1293 | ||
@@ -1275,10 +1323,11 @@ static void set_current_cr3(void *v) | |||
1275 | 1323 | ||
1276 | static void __xen_write_cr3(bool kernel, unsigned long cr3) | 1324 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
1277 | { | 1325 | { |
1278 | struct mmuext_op *op; | 1326 | struct mmuext_op op; |
1279 | struct multicall_space mcs; | ||
1280 | unsigned long mfn; | 1327 | unsigned long mfn; |
1281 | 1328 | ||
1329 | trace_xen_mmu_write_cr3(kernel, cr3); | ||
1330 | |||
1282 | if (cr3) | 1331 | if (cr3) |
1283 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | 1332 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); |
1284 | else | 1333 | else |
@@ -1286,13 +1335,10 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) | |||
1286 | 1335 | ||
1287 | WARN_ON(mfn == 0 && kernel); | 1336 | WARN_ON(mfn == 0 && kernel); |
1288 | 1337 | ||
1289 | mcs = __xen_mc_entry(sizeof(*op)); | 1338 | op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; |
1290 | 1339 | op.arg1.mfn = mfn; | |
1291 | op = mcs.args; | ||
1292 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; | ||
1293 | op->arg1.mfn = mfn; | ||
1294 | 1340 | ||
1295 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 1341 | xen_extend_mmuext_op(&op); |
1296 | 1342 | ||
1297 | if (kernel) { | 1343 | if (kernel) { |
1298 | percpu_write(xen_cr3, cr3); | 1344 | percpu_write(xen_cr3, cr3); |
@@ -1451,19 +1497,52 @@ static void __init xen_release_pmd_init(unsigned long pfn) | |||
1451 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1497 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1452 | } | 1498 | } |
1453 | 1499 | ||
1500 | static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
1501 | { | ||
1502 | struct multicall_space mcs; | ||
1503 | struct mmuext_op *op; | ||
1504 | |||
1505 | mcs = __xen_mc_entry(sizeof(*op)); | ||
1506 | op = mcs.args; | ||
1507 | op->cmd = cmd; | ||
1508 | op->arg1.mfn = pfn_to_mfn(pfn); | ||
1509 | |||
1510 | MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
1511 | } | ||
1512 | |||
1513 | static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) | ||
1514 | { | ||
1515 | struct multicall_space mcs; | ||
1516 | unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
1517 | |||
1518 | mcs = __xen_mc_entry(0); | ||
1519 | MULTI_update_va_mapping(mcs.mc, (unsigned long)addr, | ||
1520 | pfn_pte(pfn, prot), 0); | ||
1521 | } | ||
1522 | |||
1454 | /* This needs to make sure the new pte page is pinned iff its being | 1523 | /* This needs to make sure the new pte page is pinned iff its being |
1455 | attached to a pinned pagetable. */ | 1524 | attached to a pinned pagetable. */ |
1456 | static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) | 1525 | static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, |
1526 | unsigned level) | ||
1457 | { | 1527 | { |
1458 | struct page *page = pfn_to_page(pfn); | 1528 | bool pinned = PagePinned(virt_to_page(mm->pgd)); |
1529 | |||
1530 | trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); | ||
1531 | |||
1532 | if (pinned) { | ||
1533 | struct page *page = pfn_to_page(pfn); | ||
1459 | 1534 | ||
1460 | if (PagePinned(virt_to_page(mm->pgd))) { | ||
1461 | SetPagePinned(page); | 1535 | SetPagePinned(page); |
1462 | 1536 | ||
1463 | if (!PageHighMem(page)) { | 1537 | if (!PageHighMem(page)) { |
1464 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); | 1538 | xen_mc_batch(); |
1539 | |||
1540 | __set_pfn_prot(pfn, PAGE_KERNEL_RO); | ||
1541 | |||
1465 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 1542 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
1466 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | 1543 | __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
1544 | |||
1545 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1467 | } else { | 1546 | } else { |
1468 | /* make sure there are no stray mappings of | 1547 | /* make sure there are no stray mappings of |
1469 | this page */ | 1548 | this page */ |
@@ -1483,15 +1562,23 @@ static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | |||
1483 | } | 1562 | } |
1484 | 1563 | ||
1485 | /* This should never happen until we're OK to use struct page */ | 1564 | /* This should never happen until we're OK to use struct page */ |
1486 | static void xen_release_ptpage(unsigned long pfn, unsigned level) | 1565 | static inline void xen_release_ptpage(unsigned long pfn, unsigned level) |
1487 | { | 1566 | { |
1488 | struct page *page = pfn_to_page(pfn); | 1567 | struct page *page = pfn_to_page(pfn); |
1568 | bool pinned = PagePinned(page); | ||
1489 | 1569 | ||
1490 | if (PagePinned(page)) { | 1570 | trace_xen_mmu_release_ptpage(pfn, level, pinned); |
1571 | |||
1572 | if (pinned) { | ||
1491 | if (!PageHighMem(page)) { | 1573 | if (!PageHighMem(page)) { |
1574 | xen_mc_batch(); | ||
1575 | |||
1492 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 1576 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
1493 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 1577 | __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
1494 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1578 | |
1579 | __set_pfn_prot(pfn, PAGE_KERNEL); | ||
1580 | |||
1581 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1495 | } | 1582 | } |
1496 | ClearPagePinned(page); | 1583 | ClearPagePinned(page); |
1497 | } | 1584 | } |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 1b2b73ff0a6..0d82003e76a 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -30,12 +30,13 @@ | |||
30 | 30 | ||
31 | #define MC_BATCH 32 | 31 | #define MC_BATCH 32 |
32 | 32 | ||
33 | #define MC_DEBUG 1 | 33 | #define MC_DEBUG 0 |
34 | 34 | ||
35 | #define MC_ARGS (MC_BATCH * 16) | 35 | #define MC_ARGS (MC_BATCH * 16) |
36 | 36 | ||
37 | 37 | ||
38 | struct mc_buffer { | 38 | struct mc_buffer { |
39 | unsigned mcidx, argidx, cbidx; | ||
39 | struct multicall_entry entries[MC_BATCH]; | 40 | struct multicall_entry entries[MC_BATCH]; |
40 | #if MC_DEBUG | 41 | #if MC_DEBUG |
41 | struct multicall_entry debug[MC_BATCH]; | 42 | struct multicall_entry debug[MC_BATCH]; |
@@ -46,85 +47,15 @@ struct mc_buffer { | |||
46 | void (*fn)(void *); | 47 | void (*fn)(void *); |
47 | void *data; | 48 | void *data; |
48 | } callbacks[MC_BATCH]; | 49 | } callbacks[MC_BATCH]; |
49 | unsigned mcidx, argidx, cbidx; | ||
50 | }; | 50 | }; |
51 | 51 | ||
52 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); | 52 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); |
53 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | 53 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); |
54 | 54 | ||
55 | /* flush reasons 0- slots, 1- args, 2- callbacks */ | ||
56 | enum flush_reasons | ||
57 | { | ||
58 | FL_SLOTS, | ||
59 | FL_ARGS, | ||
60 | FL_CALLBACKS, | ||
61 | |||
62 | FL_N_REASONS | ||
63 | }; | ||
64 | |||
65 | #ifdef CONFIG_XEN_DEBUG_FS | ||
66 | #define NHYPERCALLS 40 /* not really */ | ||
67 | |||
68 | static struct { | ||
69 | unsigned histo[MC_BATCH+1]; | ||
70 | |||
71 | unsigned issued; | ||
72 | unsigned arg_total; | ||
73 | unsigned hypercalls; | ||
74 | unsigned histo_hypercalls[NHYPERCALLS]; | ||
75 | |||
76 | unsigned flush[FL_N_REASONS]; | ||
77 | } mc_stats; | ||
78 | |||
79 | static u8 zero_stats; | ||
80 | |||
81 | static inline void check_zero(void) | ||
82 | { | ||
83 | if (unlikely(zero_stats)) { | ||
84 | memset(&mc_stats, 0, sizeof(mc_stats)); | ||
85 | zero_stats = 0; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static void mc_add_stats(const struct mc_buffer *mc) | ||
90 | { | ||
91 | int i; | ||
92 | |||
93 | check_zero(); | ||
94 | |||
95 | mc_stats.issued++; | ||
96 | mc_stats.hypercalls += mc->mcidx; | ||
97 | mc_stats.arg_total += mc->argidx; | ||
98 | |||
99 | mc_stats.histo[mc->mcidx]++; | ||
100 | for(i = 0; i < mc->mcidx; i++) { | ||
101 | unsigned op = mc->entries[i].op; | ||
102 | if (op < NHYPERCALLS) | ||
103 | mc_stats.histo_hypercalls[op]++; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static void mc_stats_flush(enum flush_reasons idx) | ||
108 | { | ||
109 | check_zero(); | ||
110 | |||
111 | mc_stats.flush[idx]++; | ||
112 | } | ||
113 | |||
114 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
115 | |||
116 | static inline void mc_add_stats(const struct mc_buffer *mc) | ||
117 | { | ||
118 | } | ||
119 | |||
120 | static inline void mc_stats_flush(enum flush_reasons idx) | ||
121 | { | ||
122 | } | ||
123 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
124 | |||
125 | void xen_mc_flush(void) | 55 | void xen_mc_flush(void) |
126 | { | 56 | { |
127 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 57 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
58 | struct multicall_entry *mc; | ||
128 | int ret = 0; | 59 | int ret = 0; |
129 | unsigned long flags; | 60 | unsigned long flags; |
130 | int i; | 61 | int i; |
@@ -135,9 +66,26 @@ void xen_mc_flush(void) | |||
135 | something in the middle */ | 66 | something in the middle */ |
136 | local_irq_save(flags); | 67 | local_irq_save(flags); |
137 | 68 | ||
138 | mc_add_stats(b); | 69 | trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); |
70 | |||
71 | switch (b->mcidx) { | ||
72 | case 0: | ||
73 | /* no-op */ | ||
74 | BUG_ON(b->argidx != 0); | ||
75 | break; | ||
76 | |||
77 | case 1: | ||
78 | /* Singleton multicall - bypass multicall machinery | ||
79 | and just do the call directly. */ | ||
80 | mc = &b->entries[0]; | ||
81 | |||
82 | mc->result = privcmd_call(mc->op, | ||
83 | mc->args[0], mc->args[1], mc->args[2], | ||
84 | mc->args[3], mc->args[4]); | ||
85 | ret = mc->result < 0; | ||
86 | break; | ||
139 | 87 | ||
140 | if (b->mcidx) { | 88 | default: |
141 | #if MC_DEBUG | 89 | #if MC_DEBUG |
142 | memcpy(b->debug, b->entries, | 90 | memcpy(b->debug, b->entries, |
143 | b->mcidx * sizeof(struct multicall_entry)); | 91 | b->mcidx * sizeof(struct multicall_entry)); |
@@ -164,11 +112,10 @@ void xen_mc_flush(void) | |||
164 | } | 112 | } |
165 | } | 113 | } |
166 | #endif | 114 | #endif |
115 | } | ||
167 | 116 | ||
168 | b->mcidx = 0; | 117 | b->mcidx = 0; |
169 | b->argidx = 0; | 118 | b->argidx = 0; |
170 | } else | ||
171 | BUG_ON(b->argidx != 0); | ||
172 | 119 | ||
173 | for (i = 0; i < b->cbidx; i++) { | 120 | for (i = 0; i < b->cbidx; i++) { |
174 | struct callback *cb = &b->callbacks[i]; | 121 | struct callback *cb = &b->callbacks[i]; |
@@ -188,18 +135,21 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
188 | struct multicall_space ret; | 135 | struct multicall_space ret; |
189 | unsigned argidx = roundup(b->argidx, sizeof(u64)); | 136 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
190 | 137 | ||
138 | trace_xen_mc_entry_alloc(args); | ||
139 | |||
191 | BUG_ON(preemptible()); | 140 | BUG_ON(preemptible()); |
192 | BUG_ON(b->argidx >= MC_ARGS); | 141 | BUG_ON(b->argidx >= MC_ARGS); |
193 | 142 | ||
194 | if (b->mcidx == MC_BATCH || | 143 | if (unlikely(b->mcidx == MC_BATCH || |
195 | (argidx + args) >= MC_ARGS) { | 144 | (argidx + args) >= MC_ARGS)) { |
196 | mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); | 145 | trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ? |
146 | XEN_MC_FL_BATCH : XEN_MC_FL_ARGS); | ||
197 | xen_mc_flush(); | 147 | xen_mc_flush(); |
198 | argidx = roundup(b->argidx, sizeof(u64)); | 148 | argidx = roundup(b->argidx, sizeof(u64)); |
199 | } | 149 | } |
200 | 150 | ||
201 | ret.mc = &b->entries[b->mcidx]; | 151 | ret.mc = &b->entries[b->mcidx]; |
202 | #ifdef MC_DEBUG | 152 | #if MC_DEBUG |
203 | b->caller[b->mcidx] = __builtin_return_address(0); | 153 | b->caller[b->mcidx] = __builtin_return_address(0); |
204 | #endif | 154 | #endif |
205 | b->mcidx++; | 155 | b->mcidx++; |
@@ -218,20 +168,25 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) | |||
218 | BUG_ON(preemptible()); | 168 | BUG_ON(preemptible()); |
219 | BUG_ON(b->argidx >= MC_ARGS); | 169 | BUG_ON(b->argidx >= MC_ARGS); |
220 | 170 | ||
221 | if (b->mcidx == 0) | 171 | if (unlikely(b->mcidx == 0 || |
222 | return ret; | 172 | b->entries[b->mcidx - 1].op != op)) { |
223 | 173 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP); | |
224 | if (b->entries[b->mcidx - 1].op != op) | 174 | goto out; |
225 | return ret; | 175 | } |
226 | 176 | ||
227 | if ((b->argidx + size) >= MC_ARGS) | 177 | if (unlikely((b->argidx + size) >= MC_ARGS)) { |
228 | return ret; | 178 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE); |
179 | goto out; | ||
180 | } | ||
229 | 181 | ||
230 | ret.mc = &b->entries[b->mcidx - 1]; | 182 | ret.mc = &b->entries[b->mcidx - 1]; |
231 | ret.args = &b->args[b->argidx]; | 183 | ret.args = &b->args[b->argidx]; |
232 | b->argidx += size; | 184 | b->argidx += size; |
233 | 185 | ||
234 | BUG_ON(b->argidx >= MC_ARGS); | 186 | BUG_ON(b->argidx >= MC_ARGS); |
187 | |||
188 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK); | ||
189 | out: | ||
235 | return ret; | 190 | return ret; |
236 | } | 191 | } |
237 | 192 | ||
@@ -241,43 +196,13 @@ void xen_mc_callback(void (*fn)(void *), void *data) | |||
241 | struct callback *cb; | 196 | struct callback *cb; |
242 | 197 | ||
243 | if (b->cbidx == MC_BATCH) { | 198 | if (b->cbidx == MC_BATCH) { |
244 | mc_stats_flush(FL_CALLBACKS); | 199 | trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK); |
245 | xen_mc_flush(); | 200 | xen_mc_flush(); |
246 | } | 201 | } |
247 | 202 | ||
203 | trace_xen_mc_callback(fn, data); | ||
204 | |||
248 | cb = &b->callbacks[b->cbidx++]; | 205 | cb = &b->callbacks[b->cbidx++]; |
249 | cb->fn = fn; | 206 | cb->fn = fn; |
250 | cb->data = data; | 207 | cb->data = data; |
251 | } | 208 | } |
252 | |||
253 | #ifdef CONFIG_XEN_DEBUG_FS | ||
254 | |||
255 | static struct dentry *d_mc_debug; | ||
256 | |||
257 | static int __init xen_mc_debugfs(void) | ||
258 | { | ||
259 | struct dentry *d_xen = xen_init_debugfs(); | ||
260 | |||
261 | if (d_xen == NULL) | ||
262 | return -ENOMEM; | ||
263 | |||
264 | d_mc_debug = debugfs_create_dir("multicalls", d_xen); | ||
265 | |||
266 | debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats); | ||
267 | |||
268 | debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued); | ||
269 | debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls); | ||
270 | debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total); | ||
271 | |||
272 | xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug, | ||
273 | mc_stats.histo, MC_BATCH); | ||
274 | xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug, | ||
275 | mc_stats.histo_hypercalls, NHYPERCALLS); | ||
276 | xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug, | ||
277 | mc_stats.flush, FL_N_REASONS); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | fs_initcall(xen_mc_debugfs); | ||
282 | |||
283 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 4ec8035e321..dee79b78a90 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _XEN_MULTICALLS_H | 1 | #ifndef _XEN_MULTICALLS_H |
2 | #define _XEN_MULTICALLS_H | 2 | #define _XEN_MULTICALLS_H |
3 | 3 | ||
4 | #include <trace/events/xen.h> | ||
5 | |||
4 | #include "xen-ops.h" | 6 | #include "xen-ops.h" |
5 | 7 | ||
6 | /* Multicalls */ | 8 | /* Multicalls */ |
@@ -20,8 +22,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); | |||
20 | static inline void xen_mc_batch(void) | 22 | static inline void xen_mc_batch(void) |
21 | { | 23 | { |
22 | unsigned long flags; | 24 | unsigned long flags; |
25 | |||
23 | /* need to disable interrupts until this entry is complete */ | 26 | /* need to disable interrupts until this entry is complete */ |
24 | local_irq_save(flags); | 27 | local_irq_save(flags); |
28 | trace_xen_mc_batch(paravirt_get_lazy_mode()); | ||
25 | __this_cpu_write(xen_mc_irq_flags, flags); | 29 | __this_cpu_write(xen_mc_irq_flags, flags); |
26 | } | 30 | } |
27 | 31 | ||
@@ -37,6 +41,8 @@ void xen_mc_flush(void); | |||
37 | /* Issue a multicall if we're not in a lazy mode */ | 41 | /* Issue a multicall if we're not in a lazy mode */ |
38 | static inline void xen_mc_issue(unsigned mode) | 42 | static inline void xen_mc_issue(unsigned mode) |
39 | { | 43 | { |
44 | trace_xen_mc_issue(mode); | ||
45 | |||
40 | if ((paravirt_get_lazy_mode() & mode) == 0) | 46 | if ((paravirt_get_lazy_mode() & mode) == 0) |
41 | xen_mc_flush(); | 47 | xen_mc_flush(); |
42 | 48 | ||
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c new file mode 100644 index 00000000000..734beba2a08 --- /dev/null +++ b/arch/x86/xen/trace.c | |||
@@ -0,0 +1,61 @@ | |||
1 | #include <linux/ftrace.h> | ||
2 | |||
3 | #define N(x) [__HYPERVISOR_##x] = "("#x")" | ||
4 | static const char *xen_hypercall_names[] = { | ||
5 | N(set_trap_table), | ||
6 | N(mmu_update), | ||
7 | N(set_gdt), | ||
8 | N(stack_switch), | ||
9 | N(set_callbacks), | ||
10 | N(fpu_taskswitch), | ||
11 | N(sched_op_compat), | ||
12 | N(dom0_op), | ||
13 | N(set_debugreg), | ||
14 | N(get_debugreg), | ||
15 | N(update_descriptor), | ||
16 | N(memory_op), | ||
17 | N(multicall), | ||
18 | N(update_va_mapping), | ||
19 | N(set_timer_op), | ||
20 | N(event_channel_op_compat), | ||
21 | N(xen_version), | ||
22 | N(console_io), | ||
23 | N(physdev_op_compat), | ||
24 | N(grant_table_op), | ||
25 | N(vm_assist), | ||
26 | N(update_va_mapping_otherdomain), | ||
27 | N(iret), | ||
28 | N(vcpu_op), | ||
29 | N(set_segment_base), | ||
30 | N(mmuext_op), | ||
31 | N(acm_op), | ||
32 | N(nmi_op), | ||
33 | N(sched_op), | ||
34 | N(callback_op), | ||
35 | N(xenoprof_op), | ||
36 | N(event_channel_op), | ||
37 | N(physdev_op), | ||
38 | N(hvm_op), | ||
39 | |||
40 | /* Architecture-specific hypercall definitions. */ | ||
41 | N(arch_0), | ||
42 | N(arch_1), | ||
43 | N(arch_2), | ||
44 | N(arch_3), | ||
45 | N(arch_4), | ||
46 | N(arch_5), | ||
47 | N(arch_6), | ||
48 | N(arch_7), | ||
49 | }; | ||
50 | #undef N | ||
51 | |||
52 | static const char *xen_hypercall_name(unsigned op) | ||
53 | { | ||
54 | if (op < ARRAY_SIZE(xen_hypercall_names) && xen_hypercall_names[op] != NULL) | ||
55 | return xen_hypercall_names[op]; | ||
56 | |||
57 | return ""; | ||
58 | } | ||
59 | |||
60 | #define CREATE_TRACE_POINTS | ||
61 | #include <trace/events/xen.h> | ||