aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lguest/lguest.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/lguest/lguest.c')
-rw-r--r--drivers/lguest/lguest.c152
1 files changed, 78 insertions, 74 deletions
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 4a579c840301..3ba337dde857 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -23,7 +23,7 @@
23 * 23 *
24 * So how does the kernel know it's a Guest? The Guest starts at a special 24 * So how does the kernel know it's a Guest? The Guest starts at a special
25 * entry point marked with a magic string, which sets up a few things then 25 * entry point marked with a magic string, which sets up a few things then
26 * calls here. We replace the native functions in "struct paravirt_ops" 26 * calls here. We replace the native functions various "paravirt" structures
27 * with our Guest versions, then boot like normal. :*/ 27 * with our Guest versions, then boot like normal. :*/
28 28
29/* 29/*
@@ -97,29 +97,17 @@ static cycle_t clock_base;
97 * them as a batch when lazy_mode is eventually turned off. Because hypercalls 97 * them as a batch when lazy_mode is eventually turned off. Because hypercalls
98 * are reasonably expensive, batching them up makes sense. For example, a 98 * are reasonably expensive, batching them up makes sense. For example, a
99 * large mmap might update dozens of page table entries: that code calls 99 * large mmap might update dozens of page table entries: that code calls
100 * lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls 100 * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
101 * lguest_lazy_mode(PARAVIRT_LAZY_NONE). 101 * lguest_leave_lazy_mode().
102 * 102 *
103 * So, when we're in lazy mode, we call async_hypercall() to store the call for 103 * So, when we're in lazy mode, we call async_hypercall() to store the call for
104 * future processing. When lazy mode is turned off we issue a hypercall to 104 * future processing. When lazy mode is turned off we issue a hypercall to
105 * flush the stored calls. 105 * flush the stored calls.
106 * 106 */
107 * There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which 107static void lguest_leave_lazy_mode(void)
108 * indicates we're to flush any outstanding calls immediately. This is used
109 * when an interrupt handler does a kmap_atomic(): the page table changes must
110 * happen immediately even if we're in the middle of a batch. Usually we're
111 * not, though, so there's nothing to do. */
112static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */
113static void lguest_lazy_mode(enum paravirt_lazy_mode mode)
114{ 108{
115 if (mode == PARAVIRT_LAZY_FLUSH) { 109 paravirt_leave_lazy(paravirt_get_lazy_mode());
116 if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE)) 110 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
117 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
118 } else {
119 lazy_mode = mode;
120 if (mode == PARAVIRT_LAZY_NONE)
121 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
122 }
123} 111}
124 112
125static void lazy_hcall(unsigned long call, 113static void lazy_hcall(unsigned long call,
@@ -127,7 +115,7 @@ static void lazy_hcall(unsigned long call,
127 unsigned long arg2, 115 unsigned long arg2,
128 unsigned long arg3) 116 unsigned long arg3)
129{ 117{
130 if (lazy_mode == PARAVIRT_LAZY_NONE) 118 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
131 hcall(call, arg1, arg2, arg3); 119 hcall(call, arg1, arg2, arg3);
132 else 120 else
133 async_hcall(call, arg1, arg2, arg3); 121 async_hcall(call, arg1, arg2, arg3);
@@ -331,7 +319,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
331} 319}
332 320
333/*G:038 That's enough excitement for now, back to ploughing through each of 321/*G:038 That's enough excitement for now, back to ploughing through each of
334 * the paravirt_ops (we're about 1/3 of the way through). 322 * the different pv_ops structures (we're about 1/3 of the way through).
335 * 323 *
336 * This is the Local Descriptor Table, another weird Intel thingy. Linux only 324 * This is the Local Descriptor Table, another weird Intel thingy. Linux only
337 * uses this for some strange applications like Wine. We don't do anything 325 * uses this for some strange applications like Wine. We don't do anything
@@ -558,7 +546,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
558 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 546 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
559} 547}
560 548
561/* Unfortunately for Lguest, the paravirt_ops for page tables were based on 549/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
562 * native page table operations. On native hardware you can set a new page 550 * native page table operations. On native hardware you can set a new page
563 * table entry whenever you want, but if you want to remove one you have to do 551 * table entry whenever you want, but if you want to remove one you have to do
564 * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). 552 * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
@@ -782,7 +770,7 @@ static void lguest_time_init(void)
782 clocksource_register(&lguest_clock); 770 clocksource_register(&lguest_clock);
783 771
784 /* Now we've set up our clock, we can use it as the scheduler clock */ 772 /* Now we've set up our clock, we can use it as the scheduler clock */
785 paravirt_ops.sched_clock = lguest_sched_clock; 773 pv_time_ops.sched_clock = lguest_sched_clock;
786 774
787 /* We can't set cpumask in the initializer: damn C limitations! Set it 775 /* We can't set cpumask in the initializer: damn C limitations! Set it
788 * here and register our timer device. */ 776 * here and register our timer device. */
@@ -904,7 +892,7 @@ static __init char *lguest_memory_setup(void)
904/*G:050 892/*G:050
905 * Patching (Powerfully Placating Performance Pedants) 893 * Patching (Powerfully Placating Performance Pedants)
906 * 894 *
907 * We have already seen that "struct paravirt_ops" lets us replace simple 895 * We have already seen that pv_ops structures let us replace simple
908 * native instructions with calls to the appropriate back end all throughout 896 * native instructions with calls to the appropriate back end all throughout
909 * the kernel. This allows the same kernel to run as a Guest and as a native 897 * the kernel. This allows the same kernel to run as a Guest and as a native
910 * kernel, but it's slow because of all the indirect branches. 898 * kernel, but it's slow because of all the indirect branches.
@@ -929,10 +917,10 @@ static const struct lguest_insns
929{ 917{
930 const char *start, *end; 918 const char *start, *end;
931} lguest_insns[] = { 919} lguest_insns[] = {
932 [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, 920 [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
933 [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, 921 [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
934 [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, 922 [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
935 [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, 923 [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
936}; 924};
937 925
938/* Now our patch routine is fairly simple (based on the native one in 926/* Now our patch routine is fairly simple (based on the native one in
@@ -959,9 +947,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
959 return insn_len; 947 return insn_len;
960} 948}
961 949
962/*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops 950/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
963 * structure in the kernel provides a single point for (almost) every routine 951 * structures in the kernel provide points for (almost) every routine we have
964 * we have to override to avoid privileged instructions. */ 952 * to override to avoid privileged instructions. */
965__init void lguest_init(void *boot) 953__init void lguest_init(void *boot)
966{ 954{
967 /* Copy boot parameters first: the Launcher put the physical location 955 /* Copy boot parameters first: the Launcher put the physical location
@@ -976,54 +964,70 @@ __init void lguest_init(void *boot)
976 964
977 /* We're under lguest, paravirt is enabled, and we're running at 965 /* We're under lguest, paravirt is enabled, and we're running at
978 * privilege level 1, not 0 as normal. */ 966 * privilege level 1, not 0 as normal. */
979 paravirt_ops.name = "lguest"; 967 pv_info.name = "lguest";
980 paravirt_ops.paravirt_enabled = 1; 968 pv_info.paravirt_enabled = 1;
981 paravirt_ops.kernel_rpl = 1; 969 pv_info.kernel_rpl = 1;
982 970
983 /* We set up all the lguest overrides for sensitive operations. These 971 /* We set up all the lguest overrides for sensitive operations. These
984 * are detailed with the operations themselves. */ 972 * are detailed with the operations themselves. */
985 paravirt_ops.save_fl = save_fl; 973
986 paravirt_ops.restore_fl = restore_fl; 974 /* interrupt-related operations */
987 paravirt_ops.irq_disable = irq_disable; 975 pv_irq_ops.init_IRQ = lguest_init_IRQ;
988 paravirt_ops.irq_enable = irq_enable; 976 pv_irq_ops.save_fl = save_fl;
989 paravirt_ops.load_gdt = lguest_load_gdt; 977 pv_irq_ops.restore_fl = restore_fl;
990 paravirt_ops.memory_setup = lguest_memory_setup; 978 pv_irq_ops.irq_disable = irq_disable;
991 paravirt_ops.cpuid = lguest_cpuid; 979 pv_irq_ops.irq_enable = irq_enable;
992 paravirt_ops.write_cr3 = lguest_write_cr3; 980 pv_irq_ops.safe_halt = lguest_safe_halt;
993 paravirt_ops.flush_tlb_user = lguest_flush_tlb_user; 981
994 paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; 982 /* init-time operations */
995 paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; 983 pv_init_ops.memory_setup = lguest_memory_setup;
996 paravirt_ops.set_pte = lguest_set_pte; 984 pv_init_ops.patch = lguest_patch;
997 paravirt_ops.set_pte_at = lguest_set_pte_at; 985
998 paravirt_ops.set_pmd = lguest_set_pmd; 986 /* Intercepts of various cpu instructions */
987 pv_cpu_ops.load_gdt = lguest_load_gdt;
988 pv_cpu_ops.cpuid = lguest_cpuid;
989 pv_cpu_ops.load_idt = lguest_load_idt;
990 pv_cpu_ops.iret = lguest_iret;
991 pv_cpu_ops.load_esp0 = lguest_load_esp0;
992 pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
993 pv_cpu_ops.set_ldt = lguest_set_ldt;
994 pv_cpu_ops.load_tls = lguest_load_tls;
995 pv_cpu_ops.set_debugreg = lguest_set_debugreg;
996 pv_cpu_ops.clts = lguest_clts;
997 pv_cpu_ops.read_cr0 = lguest_read_cr0;
998 pv_cpu_ops.write_cr0 = lguest_write_cr0;
999 pv_cpu_ops.read_cr4 = lguest_read_cr4;
1000 pv_cpu_ops.write_cr4 = lguest_write_cr4;
1001 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
1002 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
1003 pv_cpu_ops.wbinvd = lguest_wbinvd;
1004 pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
1005 pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
1006
1007 /* pagetable management */
1008 pv_mmu_ops.write_cr3 = lguest_write_cr3;
1009 pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
1010 pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
1011 pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
1012 pv_mmu_ops.set_pte = lguest_set_pte;
1013 pv_mmu_ops.set_pte_at = lguest_set_pte_at;
1014 pv_mmu_ops.set_pmd = lguest_set_pmd;
1015 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1016 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1017 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1018 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
1019
999#ifdef CONFIG_X86_LOCAL_APIC 1020#ifdef CONFIG_X86_LOCAL_APIC
1000 paravirt_ops.apic_write = lguest_apic_write; 1021 /* apic read/write intercepts */
1001 paravirt_ops.apic_write_atomic = lguest_apic_write; 1022 pv_apic_ops.apic_write = lguest_apic_write;
1002 paravirt_ops.apic_read = lguest_apic_read; 1023 pv_apic_ops.apic_write_atomic = lguest_apic_write;
1024 pv_apic_ops.apic_read = lguest_apic_read;
1003#endif 1025#endif
1004 paravirt_ops.load_idt = lguest_load_idt; 1026
1005 paravirt_ops.iret = lguest_iret; 1027 /* time operations */
1006 paravirt_ops.load_esp0 = lguest_load_esp0; 1028 pv_time_ops.get_wallclock = lguest_get_wallclock;
1007 paravirt_ops.load_tr_desc = lguest_load_tr_desc; 1029 pv_time_ops.time_init = lguest_time_init;
1008 paravirt_ops.set_ldt = lguest_set_ldt; 1030
1009 paravirt_ops.load_tls = lguest_load_tls;
1010 paravirt_ops.set_debugreg = lguest_set_debugreg;
1011 paravirt_ops.clts = lguest_clts;
1012 paravirt_ops.read_cr0 = lguest_read_cr0;
1013 paravirt_ops.write_cr0 = lguest_write_cr0;
1014 paravirt_ops.init_IRQ = lguest_init_IRQ;
1015 paravirt_ops.read_cr2 = lguest_read_cr2;
1016 paravirt_ops.read_cr3 = lguest_read_cr3;
1017 paravirt_ops.read_cr4 = lguest_read_cr4;
1018 paravirt_ops.write_cr4 = lguest_write_cr4;
1019 paravirt_ops.write_gdt_entry = lguest_write_gdt_entry;
1020 paravirt_ops.write_idt_entry = lguest_write_idt_entry;
1021 paravirt_ops.patch = lguest_patch;
1022 paravirt_ops.safe_halt = lguest_safe_halt;
1023 paravirt_ops.get_wallclock = lguest_get_wallclock;
1024 paravirt_ops.time_init = lguest_time_init;
1025 paravirt_ops.set_lazy_mode = lguest_lazy_mode;
1026 paravirt_ops.wbinvd = lguest_wbinvd;
1027 /* Now is a good time to look at the implementations of these functions 1031 /* Now is a good time to look at the implementations of these functions
1028 * before returning to the rest of lguest_init(). */ 1032 * before returning to the rest of lguest_init(). */
1029 1033