diff options
| author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-10-16 14:51:29 -0400 |
|---|---|---|
| committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-10-16 14:51:29 -0400 |
| commit | 93b1eab3d29e7ea32ee583de3362da84db06ded8 (patch) | |
| tree | 8dc7eb61d4c65a48f9ce21a49e392f4967185cfd /drivers/lguest | |
| parent | ab9c232286c2b77be78441c2d8396500b045777e (diff) | |
paravirt: refactor struct paravirt_ops into smaller pv_*_ops
This patch refactors the paravirt_ops structure into groups of
functionally related ops:
pv_info - random info, rather than function entrypoints
pv_init_ops - functions used at boot time (some for module_init too)
pv_misc_ops - lazy mode, which didn't fit well anywhere else
pv_time_ops - time-related functions
pv_cpu_ops - various privileged instruction ops
pv_irq_ops - operations for managing interrupt state
pv_apic_ops - APIC operations
pv_mmu_ops - operations for managing pagetables
There are several motivations for this:
1. Some of these ops will be general to all x86, and some will be
i386/x86-64 specific. This makes it easier to share common stuff
while allowing separate implementations where needed.
2. At the moment we must export all of paravirt_ops, but modules only
need selected parts of it. This allows us to export on a case by case
basis (and also choose which export license we want to apply).
3. Functional groupings make things a bit more readable.
Struct paravirt_ops is now only used as a template to generate
patch-site identifiers, and to extract function pointers for inserting
into jmp/calls when patching. It is only instantiated when needed.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Zach Amsden <zach@vmware.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Anthony Liguory <aliguori@us.ibm.com>
Cc: "Glauber de Oliveira Costa" <glommer@gmail.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Diffstat (limited to 'drivers/lguest')
| -rw-r--r-- | drivers/lguest/core.c | 6 | ||||
| -rw-r--r-- | drivers/lguest/lguest.c | 124 | ||||
| -rw-r--r-- | drivers/lguest/lguest_bus.c | 2 |
3 files changed, 73 insertions, 59 deletions
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 4a315f08a567..a0788c12b392 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
| @@ -248,8 +248,8 @@ static void unmap_switcher(void) | |||
| 248 | } | 248 | } |
| 249 | 249 | ||
| 250 | /*H:130 Our Guest is usually so well behaved; it never tries to do things it | 250 | /*H:130 Our Guest is usually so well behaved; it never tries to do things it |
| 251 | * isn't allowed to. Unfortunately, "struct paravirt_ops" isn't quite | 251 | * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't |
| 252 | * complete, because it doesn't contain replacements for the Intel I/O | 252 | * quite complete, because it doesn't contain replacements for the Intel I/O |
| 253 | * instructions. As a result, the Guest sometimes fumbles across one during | 253 | * instructions. As a result, the Guest sometimes fumbles across one during |
| 254 | * the boot process as it probes for various things which are usually attached | 254 | * the boot process as it probes for various things which are usually attached |
| 255 | * to a PC. | 255 | * to a PC. |
| @@ -694,7 +694,7 @@ static int __init init(void) | |||
| 694 | 694 | ||
| 695 | /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ | 695 | /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ |
| 696 | if (paravirt_enabled()) { | 696 | if (paravirt_enabled()) { |
| 697 | printk("lguest is afraid of %s\n", paravirt_ops.name); | 697 | printk("lguest is afraid of %s\n", pv_info.name); |
| 698 | return -EPERM; | 698 | return -EPERM; |
| 699 | } | 699 | } |
| 700 | 700 | ||
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index ee1c6d05c3d3..ca9b844f37c2 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | * | 23 | * |
| 24 | * So how does the kernel know it's a Guest? The Guest starts at a special | 24 | * So how does the kernel know it's a Guest? The Guest starts at a special |
| 25 | * entry point marked with a magic string, which sets up a few things then | 25 | * entry point marked with a magic string, which sets up a few things then |
| 26 | * calls here. We replace the native functions in "struct paravirt_ops" | 26 | * calls here. We replace the native functions various "paravirt" structures |
| 27 | * with our Guest versions, then boot like normal. :*/ | 27 | * with our Guest versions, then boot like normal. :*/ |
| 28 | 28 | ||
| 29 | /* | 29 | /* |
| @@ -331,7 +331,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) | |||
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | /*G:038 That's enough excitement for now, back to ploughing through each of | 333 | /*G:038 That's enough excitement for now, back to ploughing through each of |
| 334 | * the paravirt_ops (we're about 1/3 of the way through). | 334 | * the different pv_ops structures (we're about 1/3 of the way through). |
| 335 | * | 335 | * |
| 336 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only | 336 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only |
| 337 | * uses this for some strange applications like Wine. We don't do anything | 337 | * uses this for some strange applications like Wine. We don't do anything |
| @@ -558,7 +558,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) | |||
| 558 | lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); | 558 | lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); |
| 559 | } | 559 | } |
| 560 | 560 | ||
| 561 | /* Unfortunately for Lguest, the paravirt_ops for page tables were based on | 561 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on |
| 562 | * native page table operations. On native hardware you can set a new page | 562 | * native page table operations. On native hardware you can set a new page |
| 563 | * table entry whenever you want, but if you want to remove one you have to do | 563 | * table entry whenever you want, but if you want to remove one you have to do |
| 564 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). | 564 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). |
| @@ -782,7 +782,7 @@ static void lguest_time_init(void) | |||
| 782 | clocksource_register(&lguest_clock); | 782 | clocksource_register(&lguest_clock); |
| 783 | 783 | ||
| 784 | /* Now we've set up our clock, we can use it as the scheduler clock */ | 784 | /* Now we've set up our clock, we can use it as the scheduler clock */ |
| 785 | paravirt_ops.sched_clock = lguest_sched_clock; | 785 | pv_time_ops.sched_clock = lguest_sched_clock; |
| 786 | 786 | ||
| 787 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 787 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
| 788 | * here and register our timer device. */ | 788 | * here and register our timer device. */ |
| @@ -902,7 +902,7 @@ static __init char *lguest_memory_setup(void) | |||
| 902 | /*G:050 | 902 | /*G:050 |
| 903 | * Patching (Powerfully Placating Performance Pedants) | 903 | * Patching (Powerfully Placating Performance Pedants) |
| 904 | * | 904 | * |
| 905 | * We have already seen that "struct paravirt_ops" lets us replace simple | 905 | * We have already seen that pv_ops structures let us replace simple |
| 906 | * native instructions with calls to the appropriate back end all throughout | 906 | * native instructions with calls to the appropriate back end all throughout |
| 907 | * the kernel. This allows the same kernel to run as a Guest and as a native | 907 | * the kernel. This allows the same kernel to run as a Guest and as a native |
| 908 | * kernel, but it's slow because of all the indirect branches. | 908 | * kernel, but it's slow because of all the indirect branches. |
| @@ -927,10 +927,10 @@ static const struct lguest_insns | |||
| 927 | { | 927 | { |
| 928 | const char *start, *end; | 928 | const char *start, *end; |
| 929 | } lguest_insns[] = { | 929 | } lguest_insns[] = { |
| 930 | [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, | 930 | [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, |
| 931 | [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, | 931 | [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, |
| 932 | [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, | 932 | [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, |
| 933 | [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, | 933 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, |
| 934 | }; | 934 | }; |
| 935 | 935 | ||
| 936 | /* Now our patch routine is fairly simple (based on the native one in | 936 | /* Now our patch routine is fairly simple (based on the native one in |
| @@ -957,9 +957,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, | |||
| 957 | return insn_len; | 957 | return insn_len; |
| 958 | } | 958 | } |
| 959 | 959 | ||
| 960 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops | 960 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops |
| 961 | * structure in the kernel provides a single point for (almost) every routine | 961 | * structures in the kernel provide points for (almost) every routine we have |
| 962 | * we have to override to avoid privileged instructions. */ | 962 | * to override to avoid privileged instructions. */ |
| 963 | __init void lguest_init(void *boot) | 963 | __init void lguest_init(void *boot) |
| 964 | { | 964 | { |
| 965 | /* Copy boot parameters first: the Launcher put the physical location | 965 | /* Copy boot parameters first: the Launcher put the physical location |
| @@ -974,54 +974,68 @@ __init void lguest_init(void *boot) | |||
| 974 | 974 | ||
| 975 | /* We're under lguest, paravirt is enabled, and we're running at | 975 | /* We're under lguest, paravirt is enabled, and we're running at |
| 976 | * privilege level 1, not 0 as normal. */ | 976 | * privilege level 1, not 0 as normal. */ |
| 977 | paravirt_ops.name = "lguest"; | 977 | pv_info.name = "lguest"; |
| 978 | paravirt_ops.paravirt_enabled = 1; | 978 | pv_info.paravirt_enabled = 1; |
| 979 | paravirt_ops.kernel_rpl = 1; | 979 | pv_info.kernel_rpl = 1; |
| 980 | 980 | ||
| 981 | /* We set up all the lguest overrides for sensitive operations. These | 981 | /* We set up all the lguest overrides for sensitive operations. These |
| 982 | * are detailed with the operations themselves. */ | 982 | * are detailed with the operations themselves. */ |
| 983 | paravirt_ops.save_fl = save_fl; | 983 | |
| 984 | paravirt_ops.restore_fl = restore_fl; | 984 | /* interrupt-related operations */ |
| 985 | paravirt_ops.irq_disable = irq_disable; | 985 | pv_irq_ops.init_IRQ = lguest_init_IRQ; |
| 986 | paravirt_ops.irq_enable = irq_enable; | 986 | pv_irq_ops.save_fl = save_fl; |
| 987 | paravirt_ops.load_gdt = lguest_load_gdt; | 987 | pv_irq_ops.restore_fl = restore_fl; |
| 988 | paravirt_ops.memory_setup = lguest_memory_setup; | 988 | pv_irq_ops.irq_disable = irq_disable; |
| 989 | paravirt_ops.cpuid = lguest_cpuid; | 989 | pv_irq_ops.irq_enable = irq_enable; |
| 990 | paravirt_ops.write_cr3 = lguest_write_cr3; | 990 | pv_irq_ops.safe_halt = lguest_safe_halt; |
| 991 | paravirt_ops.flush_tlb_user = lguest_flush_tlb_user; | 991 | |
| 992 | paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; | 992 | /* init-time operations */ |
| 993 | paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; | 993 | pv_init_ops.memory_setup = lguest_memory_setup; |
| 994 | paravirt_ops.set_pte = lguest_set_pte; | 994 | pv_init_ops.patch = lguest_patch; |
| 995 | paravirt_ops.set_pte_at = lguest_set_pte_at; | 995 | |
| 996 | paravirt_ops.set_pmd = lguest_set_pmd; | 996 | /* Intercepts of various cpu instructions */ |
| 997 | pv_cpu_ops.load_gdt = lguest_load_gdt; | ||
| 998 | pv_cpu_ops.cpuid = lguest_cpuid; | ||
| 999 | pv_cpu_ops.load_idt = lguest_load_idt; | ||
| 1000 | pv_cpu_ops.iret = lguest_iret; | ||
| 1001 | pv_cpu_ops.load_esp0 = lguest_load_esp0; | ||
| 1002 | pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; | ||
| 1003 | pv_cpu_ops.set_ldt = lguest_set_ldt; | ||
| 1004 | pv_cpu_ops.load_tls = lguest_load_tls; | ||
| 1005 | pv_cpu_ops.set_debugreg = lguest_set_debugreg; | ||
| 1006 | pv_cpu_ops.clts = lguest_clts; | ||
| 1007 | pv_cpu_ops.read_cr0 = lguest_read_cr0; | ||
| 1008 | pv_cpu_ops.write_cr0 = lguest_write_cr0; | ||
| 1009 | pv_cpu_ops.read_cr4 = lguest_read_cr4; | ||
| 1010 | pv_cpu_ops.write_cr4 = lguest_write_cr4; | ||
| 1011 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; | ||
| 1012 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; | ||
| 1013 | pv_cpu_ops.wbinvd = lguest_wbinvd; | ||
| 1014 | |||
| 1015 | /* pagetable management */ | ||
| 1016 | pv_mmu_ops.write_cr3 = lguest_write_cr3; | ||
| 1017 | pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user; | ||
| 1018 | pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single; | ||
| 1019 | pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; | ||
| 1020 | pv_mmu_ops.set_pte = lguest_set_pte; | ||
| 1021 | pv_mmu_ops.set_pte_at = lguest_set_pte_at; | ||
| 1022 | pv_mmu_ops.set_pmd = lguest_set_pmd; | ||
| 1023 | pv_mmu_ops.read_cr2 = lguest_read_cr2; | ||
| 1024 | pv_mmu_ops.read_cr3 = lguest_read_cr3; | ||
| 1025 | |||
| 997 | #ifdef CONFIG_X86_LOCAL_APIC | 1026 | #ifdef CONFIG_X86_LOCAL_APIC |
| 998 | paravirt_ops.apic_write = lguest_apic_write; | 1027 | /* apic read/write intercepts */ |
| 999 | paravirt_ops.apic_write_atomic = lguest_apic_write; | 1028 | pv_apic_ops.apic_write = lguest_apic_write; |
| 1000 | paravirt_ops.apic_read = lguest_apic_read; | 1029 | pv_apic_ops.apic_write_atomic = lguest_apic_write; |
| 1030 | pv_apic_ops.apic_read = lguest_apic_read; | ||
| 1001 | #endif | 1031 | #endif |
| 1002 | paravirt_ops.load_idt = lguest_load_idt; | 1032 | |
| 1003 | paravirt_ops.iret = lguest_iret; | 1033 | /* time operations */ |
| 1004 | paravirt_ops.load_esp0 = lguest_load_esp0; | 1034 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
| 1005 | paravirt_ops.load_tr_desc = lguest_load_tr_desc; | 1035 | pv_time_ops.time_init = lguest_time_init; |
| 1006 | paravirt_ops.set_ldt = lguest_set_ldt; | 1036 | |
| 1007 | paravirt_ops.load_tls = lguest_load_tls; | 1037 | pv_misc_ops.set_lazy_mode = lguest_lazy_mode; |
| 1008 | paravirt_ops.set_debugreg = lguest_set_debugreg; | 1038 | |
| 1009 | paravirt_ops.clts = lguest_clts; | ||
| 1010 | paravirt_ops.read_cr0 = lguest_read_cr0; | ||
| 1011 | paravirt_ops.write_cr0 = lguest_write_cr0; | ||
| 1012 | paravirt_ops.init_IRQ = lguest_init_IRQ; | ||
| 1013 | paravirt_ops.read_cr2 = lguest_read_cr2; | ||
| 1014 | paravirt_ops.read_cr3 = lguest_read_cr3; | ||
| 1015 | paravirt_ops.read_cr4 = lguest_read_cr4; | ||
| 1016 | paravirt_ops.write_cr4 = lguest_write_cr4; | ||
| 1017 | paravirt_ops.write_gdt_entry = lguest_write_gdt_entry; | ||
| 1018 | paravirt_ops.write_idt_entry = lguest_write_idt_entry; | ||
| 1019 | paravirt_ops.patch = lguest_patch; | ||
| 1020 | paravirt_ops.safe_halt = lguest_safe_halt; | ||
| 1021 | paravirt_ops.get_wallclock = lguest_get_wallclock; | ||
| 1022 | paravirt_ops.time_init = lguest_time_init; | ||
| 1023 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; | ||
| 1024 | paravirt_ops.wbinvd = lguest_wbinvd; | ||
| 1025 | /* Now is a good time to look at the implementations of these functions | 1039 | /* Now is a good time to look at the implementations of these functions |
| 1026 | * before returning to the rest of lguest_init(). */ | 1040 | * before returning to the rest of lguest_init(). */ |
| 1027 | 1041 | ||
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c index 9e7752cc8002..57329788f8a7 100644 --- a/drivers/lguest/lguest_bus.c +++ b/drivers/lguest/lguest_bus.c | |||
| @@ -201,7 +201,7 @@ static void scan_devices(void) | |||
| 201 | * "struct lguest_device_desc" array. */ | 201 | * "struct lguest_device_desc" array. */ |
| 202 | static int __init lguest_bus_init(void) | 202 | static int __init lguest_bus_init(void) |
| 203 | { | 203 | { |
| 204 | if (strcmp(paravirt_ops.name, "lguest") != 0) | 204 | if (strcmp(pv_info.name, "lguest") != 0) |
| 205 | return 0; | 205 | return 0; |
| 206 | 206 | ||
| 207 | /* Devices are in a single page above top of "normal" mem */ | 207 | /* Devices are in a single page above top of "normal" mem */ |
