diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-10-16 14:51:29 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-10-16 14:51:29 -0400 |
commit | 93b1eab3d29e7ea32ee583de3362da84db06ded8 (patch) | |
tree | 8dc7eb61d4c65a48f9ce21a49e392f4967185cfd /drivers/lguest/lguest.c | |
parent | ab9c232286c2b77be78441c2d8396500b045777e (diff) |
paravirt: refactor struct paravirt_ops into smaller pv_*_ops
This patch refactors the paravirt_ops structure into groups of
functionally related ops:
pv_info - random info, rather than function entrypoints
pv_init_ops - functions used at boot time (some for module_init too)
pv_misc_ops - lazy mode, which didn't fit well anywhere else
pv_time_ops - time-related functions
pv_cpu_ops - various privileged instruction ops
pv_irq_ops - operations for managing interrupt state
pv_apic_ops - APIC operations
pv_mmu_ops - operations for managing pagetables
There are several motivations for this:
1. Some of these ops will be general to all x86, and some will be
i386/x86-64 specific. This makes it easier to share common stuff
while allowing separate implementations where needed.
2. At the moment we must export all of paravirt_ops, but modules only
need selected parts of it. This allows us to export on a case by case
basis (and also choose which export license we want to apply).
3. Functional groupings make things a bit more readable.
Struct paravirt_ops is now only used as a template to generate
patch-site identifiers, and to extract function pointers for inserting
into jmp/calls when patching. It is only instantiated when needed.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Zach Amsden <zach@vmware.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Anthony Liguory <aliguori@us.ibm.com>
Cc: "Glauber de Oliveira Costa" <glommer@gmail.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Diffstat (limited to 'drivers/lguest/lguest.c')
-rw-r--r-- | drivers/lguest/lguest.c | 124 |
1 files changed, 69 insertions, 55 deletions
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index ee1c6d05c3d..ca9b844f37c 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
@@ -23,7 +23,7 @@ | |||
23 | * | 23 | * |
24 | * So how does the kernel know it's a Guest? The Guest starts at a special | 24 | * So how does the kernel know it's a Guest? The Guest starts at a special |
25 | * entry point marked with a magic string, which sets up a few things then | 25 | * entry point marked with a magic string, which sets up a few things then |
26 | * calls here. We replace the native functions in "struct paravirt_ops" | 26 | * calls here. We replace the native functions various "paravirt" structures |
27 | * with our Guest versions, then boot like normal. :*/ | 27 | * with our Guest versions, then boot like normal. :*/ |
28 | 28 | ||
29 | /* | 29 | /* |
@@ -331,7 +331,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) | |||
331 | } | 331 | } |
332 | 332 | ||
333 | /*G:038 That's enough excitement for now, back to ploughing through each of | 333 | /*G:038 That's enough excitement for now, back to ploughing through each of |
334 | * the paravirt_ops (we're about 1/3 of the way through). | 334 | * the different pv_ops structures (we're about 1/3 of the way through). |
335 | * | 335 | * |
336 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only | 336 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only |
337 | * uses this for some strange applications like Wine. We don't do anything | 337 | * uses this for some strange applications like Wine. We don't do anything |
@@ -558,7 +558,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) | |||
558 | lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); | 558 | lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); |
559 | } | 559 | } |
560 | 560 | ||
561 | /* Unfortunately for Lguest, the paravirt_ops for page tables were based on | 561 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on |
562 | * native page table operations. On native hardware you can set a new page | 562 | * native page table operations. On native hardware you can set a new page |
563 | * table entry whenever you want, but if you want to remove one you have to do | 563 | * table entry whenever you want, but if you want to remove one you have to do |
564 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). | 564 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). |
@@ -782,7 +782,7 @@ static void lguest_time_init(void) | |||
782 | clocksource_register(&lguest_clock); | 782 | clocksource_register(&lguest_clock); |
783 | 783 | ||
784 | /* Now we've set up our clock, we can use it as the scheduler clock */ | 784 | /* Now we've set up our clock, we can use it as the scheduler clock */ |
785 | paravirt_ops.sched_clock = lguest_sched_clock; | 785 | pv_time_ops.sched_clock = lguest_sched_clock; |
786 | 786 | ||
787 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 787 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
788 | * here and register our timer device. */ | 788 | * here and register our timer device. */ |
@@ -902,7 +902,7 @@ static __init char *lguest_memory_setup(void) | |||
902 | /*G:050 | 902 | /*G:050 |
903 | * Patching (Powerfully Placating Performance Pedants) | 903 | * Patching (Powerfully Placating Performance Pedants) |
904 | * | 904 | * |
905 | * We have already seen that "struct paravirt_ops" lets us replace simple | 905 | * We have already seen that pv_ops structures let us replace simple |
906 | * native instructions with calls to the appropriate back end all throughout | 906 | * native instructions with calls to the appropriate back end all throughout |
907 | * the kernel. This allows the same kernel to run as a Guest and as a native | 907 | * the kernel. This allows the same kernel to run as a Guest and as a native |
908 | * kernel, but it's slow because of all the indirect branches. | 908 | * kernel, but it's slow because of all the indirect branches. |
@@ -927,10 +927,10 @@ static const struct lguest_insns | |||
927 | { | 927 | { |
928 | const char *start, *end; | 928 | const char *start, *end; |
929 | } lguest_insns[] = { | 929 | } lguest_insns[] = { |
930 | [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, | 930 | [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, |
931 | [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, | 931 | [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, |
932 | [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, | 932 | [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, |
933 | [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, | 933 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, |
934 | }; | 934 | }; |
935 | 935 | ||
936 | /* Now our patch routine is fairly simple (based on the native one in | 936 | /* Now our patch routine is fairly simple (based on the native one in |
@@ -957,9 +957,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, | |||
957 | return insn_len; | 957 | return insn_len; |
958 | } | 958 | } |
959 | 959 | ||
960 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops | 960 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops |
961 | * structure in the kernel provides a single point for (almost) every routine | 961 | * structures in the kernel provide points for (almost) every routine we have |
962 | * we have to override to avoid privileged instructions. */ | 962 | * to override to avoid privileged instructions. */ |
963 | __init void lguest_init(void *boot) | 963 | __init void lguest_init(void *boot) |
964 | { | 964 | { |
965 | /* Copy boot parameters first: the Launcher put the physical location | 965 | /* Copy boot parameters first: the Launcher put the physical location |
@@ -974,54 +974,68 @@ __init void lguest_init(void *boot) | |||
974 | 974 | ||
975 | /* We're under lguest, paravirt is enabled, and we're running at | 975 | /* We're under lguest, paravirt is enabled, and we're running at |
976 | * privilege level 1, not 0 as normal. */ | 976 | * privilege level 1, not 0 as normal. */ |
977 | paravirt_ops.name = "lguest"; | 977 | pv_info.name = "lguest"; |
978 | paravirt_ops.paravirt_enabled = 1; | 978 | pv_info.paravirt_enabled = 1; |
979 | paravirt_ops.kernel_rpl = 1; | 979 | pv_info.kernel_rpl = 1; |
980 | 980 | ||
981 | /* We set up all the lguest overrides for sensitive operations. These | 981 | /* We set up all the lguest overrides for sensitive operations. These |
982 | * are detailed with the operations themselves. */ | 982 | * are detailed with the operations themselves. */ |
983 | paravirt_ops.save_fl = save_fl; | 983 | |
984 | paravirt_ops.restore_fl = restore_fl; | 984 | /* interrupt-related operations */ |
985 | paravirt_ops.irq_disable = irq_disable; | 985 | pv_irq_ops.init_IRQ = lguest_init_IRQ; |
986 | paravirt_ops.irq_enable = irq_enable; | 986 | pv_irq_ops.save_fl = save_fl; |
987 | paravirt_ops.load_gdt = lguest_load_gdt; | 987 | pv_irq_ops.restore_fl = restore_fl; |
988 | paravirt_ops.memory_setup = lguest_memory_setup; | 988 | pv_irq_ops.irq_disable = irq_disable; |
989 | paravirt_ops.cpuid = lguest_cpuid; | 989 | pv_irq_ops.irq_enable = irq_enable; |
990 | paravirt_ops.write_cr3 = lguest_write_cr3; | 990 | pv_irq_ops.safe_halt = lguest_safe_halt; |
991 | paravirt_ops.flush_tlb_user = lguest_flush_tlb_user; | 991 | |
992 | paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; | 992 | /* init-time operations */ |
993 | paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; | 993 | pv_init_ops.memory_setup = lguest_memory_setup; |
994 | paravirt_ops.set_pte = lguest_set_pte; | 994 | pv_init_ops.patch = lguest_patch; |
995 | paravirt_ops.set_pte_at = lguest_set_pte_at; | 995 | |
996 | paravirt_ops.set_pmd = lguest_set_pmd; | 996 | /* Intercepts of various cpu instructions */ |
997 | pv_cpu_ops.load_gdt = lguest_load_gdt; | ||
998 | pv_cpu_ops.cpuid = lguest_cpuid; | ||
999 | pv_cpu_ops.load_idt = lguest_load_idt; | ||
1000 | pv_cpu_ops.iret = lguest_iret; | ||
1001 | pv_cpu_ops.load_esp0 = lguest_load_esp0; | ||
1002 | pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; | ||
1003 | pv_cpu_ops.set_ldt = lguest_set_ldt; | ||
1004 | pv_cpu_ops.load_tls = lguest_load_tls; | ||
1005 | pv_cpu_ops.set_debugreg = lguest_set_debugreg; | ||
1006 | pv_cpu_ops.clts = lguest_clts; | ||
1007 | pv_cpu_ops.read_cr0 = lguest_read_cr0; | ||
1008 | pv_cpu_ops.write_cr0 = lguest_write_cr0; | ||
1009 | pv_cpu_ops.read_cr4 = lguest_read_cr4; | ||
1010 | pv_cpu_ops.write_cr4 = lguest_write_cr4; | ||
1011 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; | ||
1012 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; | ||
1013 | pv_cpu_ops.wbinvd = lguest_wbinvd; | ||
1014 | |||
1015 | /* pagetable management */ | ||
1016 | pv_mmu_ops.write_cr3 = lguest_write_cr3; | ||
1017 | pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user; | ||
1018 | pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single; | ||
1019 | pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; | ||
1020 | pv_mmu_ops.set_pte = lguest_set_pte; | ||
1021 | pv_mmu_ops.set_pte_at = lguest_set_pte_at; | ||
1022 | pv_mmu_ops.set_pmd = lguest_set_pmd; | ||
1023 | pv_mmu_ops.read_cr2 = lguest_read_cr2; | ||
1024 | pv_mmu_ops.read_cr3 = lguest_read_cr3; | ||
1025 | |||
997 | #ifdef CONFIG_X86_LOCAL_APIC | 1026 | #ifdef CONFIG_X86_LOCAL_APIC |
998 | paravirt_ops.apic_write = lguest_apic_write; | 1027 | /* apic read/write intercepts */ |
999 | paravirt_ops.apic_write_atomic = lguest_apic_write; | 1028 | pv_apic_ops.apic_write = lguest_apic_write; |
1000 | paravirt_ops.apic_read = lguest_apic_read; | 1029 | pv_apic_ops.apic_write_atomic = lguest_apic_write; |
1030 | pv_apic_ops.apic_read = lguest_apic_read; | ||
1001 | #endif | 1031 | #endif |
1002 | paravirt_ops.load_idt = lguest_load_idt; | 1032 | |
1003 | paravirt_ops.iret = lguest_iret; | 1033 | /* time operations */ |
1004 | paravirt_ops.load_esp0 = lguest_load_esp0; | 1034 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
1005 | paravirt_ops.load_tr_desc = lguest_load_tr_desc; | 1035 | pv_time_ops.time_init = lguest_time_init; |
1006 | paravirt_ops.set_ldt = lguest_set_ldt; | 1036 | |
1007 | paravirt_ops.load_tls = lguest_load_tls; | 1037 | pv_misc_ops.set_lazy_mode = lguest_lazy_mode; |
1008 | paravirt_ops.set_debugreg = lguest_set_debugreg; | 1038 | |
1009 | paravirt_ops.clts = lguest_clts; | ||
1010 | paravirt_ops.read_cr0 = lguest_read_cr0; | ||
1011 | paravirt_ops.write_cr0 = lguest_write_cr0; | ||
1012 | paravirt_ops.init_IRQ = lguest_init_IRQ; | ||
1013 | paravirt_ops.read_cr2 = lguest_read_cr2; | ||
1014 | paravirt_ops.read_cr3 = lguest_read_cr3; | ||
1015 | paravirt_ops.read_cr4 = lguest_read_cr4; | ||
1016 | paravirt_ops.write_cr4 = lguest_write_cr4; | ||
1017 | paravirt_ops.write_gdt_entry = lguest_write_gdt_entry; | ||
1018 | paravirt_ops.write_idt_entry = lguest_write_idt_entry; | ||
1019 | paravirt_ops.patch = lguest_patch; | ||
1020 | paravirt_ops.safe_halt = lguest_safe_halt; | ||
1021 | paravirt_ops.get_wallclock = lguest_get_wallclock; | ||
1022 | paravirt_ops.time_init = lguest_time_init; | ||
1023 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; | ||
1024 | paravirt_ops.wbinvd = lguest_wbinvd; | ||
1025 | /* Now is a good time to look at the implementations of these functions | 1039 | /* Now is a good time to look at the implementations of these functions |
1026 | * before returning to the rest of lguest_init(). */ | 1040 | * before returning to the rest of lguest_init(). */ |
1027 | 1041 | ||