diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-17 14:10:11 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-17 14:10:11 -0400 |
commit | fb9fc395174138983a49f2da982ed14caabbe741 (patch) | |
tree | 5d5d3643ee6853a899205613da272cc343fdc1a4 /drivers/lguest | |
parent | 0eafaae84e21ac033815cc9f33c3ae889cd7ccfe (diff) | |
parent | ace2e92e193126711cb3a83a3752b2c5b8396950 (diff) |
Merge branch 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
xfs: eagerly remove vmap mappings to avoid upsetting Xen
xen: add some debug output for failed multicalls
xen: fix incorrect vcpu_register_vcpu_info hypercall argument
xen: ask the hypervisor how much space it needs reserved
xen: lock pte pages while pinning/unpinning
xen: deal with stale cr3 values when unpinning pagetables
xen: add batch completion callbacks
xen: yield to IPI target if necessary
Clean up duplicate includes in arch/i386/xen/
remove dead code in pgtable_cache_init
paravirt: clean up lazy mode handling
paravirt: refactor struct paravirt_ops into smaller pv_*_ops
Diffstat (limited to 'drivers/lguest')
-rw-r--r-- | drivers/lguest/core.c | 6 | ||||
-rw-r--r-- | drivers/lguest/lguest.c | 152 | ||||
-rw-r--r-- | drivers/lguest/lguest_bus.c | 2 |
3 files changed, 82 insertions, 78 deletions
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 4a315f08a567..a0788c12b392 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c | |||
@@ -248,8 +248,8 @@ static void unmap_switcher(void) | |||
248 | } | 248 | } |
249 | 249 | ||
250 | /*H:130 Our Guest is usually so well behaved; it never tries to do things it | 250 | /*H:130 Our Guest is usually so well behaved; it never tries to do things it |
251 | * isn't allowed to. Unfortunately, "struct paravirt_ops" isn't quite | 251 | * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't |
252 | * complete, because it doesn't contain replacements for the Intel I/O | 252 | * quite complete, because it doesn't contain replacements for the Intel I/O |
253 | * instructions. As a result, the Guest sometimes fumbles across one during | 253 | * instructions. As a result, the Guest sometimes fumbles across one during |
254 | * the boot process as it probes for various things which are usually attached | 254 | * the boot process as it probes for various things which are usually attached |
255 | * to a PC. | 255 | * to a PC. |
@@ -694,7 +694,7 @@ static int __init init(void) | |||
694 | 694 | ||
695 | /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ | 695 | /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ |
696 | if (paravirt_enabled()) { | 696 | if (paravirt_enabled()) { |
697 | printk("lguest is afraid of %s\n", paravirt_ops.name); | 697 | printk("lguest is afraid of %s\n", pv_info.name); |
698 | return -EPERM; | 698 | return -EPERM; |
699 | } | 699 | } |
700 | 700 | ||
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 4a579c840301..3ba337dde857 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c | |||
@@ -23,7 +23,7 @@ | |||
23 | * | 23 | * |
24 | * So how does the kernel know it's a Guest? The Guest starts at a special | 24 | * So how does the kernel know it's a Guest? The Guest starts at a special |
25 | * entry point marked with a magic string, which sets up a few things then | 25 | * entry point marked with a magic string, which sets up a few things then |
26 | * calls here. We replace the native functions in "struct paravirt_ops" | 26 | * calls here. We replace the native functions various "paravirt" structures |
27 | * with our Guest versions, then boot like normal. :*/ | 27 | * with our Guest versions, then boot like normal. :*/ |
28 | 28 | ||
29 | /* | 29 | /* |
@@ -97,29 +97,17 @@ static cycle_t clock_base; | |||
97 | * them as a batch when lazy_mode is eventually turned off. Because hypercalls | 97 | * them as a batch when lazy_mode is eventually turned off. Because hypercalls |
98 | * are reasonably expensive, batching them up makes sense. For example, a | 98 | * are reasonably expensive, batching them up makes sense. For example, a |
99 | * large mmap might update dozens of page table entries: that code calls | 99 | * large mmap might update dozens of page table entries: that code calls |
100 | * lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls | 100 | * paravirt_enter_lazy_mmu(), does the dozen updates, then calls |
101 | * lguest_lazy_mode(PARAVIRT_LAZY_NONE). | 101 | * lguest_leave_lazy_mode(). |
102 | * | 102 | * |
103 | * So, when we're in lazy mode, we call async_hypercall() to store the call for | 103 | * So, when we're in lazy mode, we call async_hypercall() to store the call for |
104 | * future processing. When lazy mode is turned off we issue a hypercall to | 104 | * future processing. When lazy mode is turned off we issue a hypercall to |
105 | * flush the stored calls. | 105 | * flush the stored calls. |
106 | * | 106 | */ |
107 | * There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which | 107 | static void lguest_leave_lazy_mode(void) |
108 | * indicates we're to flush any outstanding calls immediately. This is used | ||
109 | * when an interrupt handler does a kmap_atomic(): the page table changes must | ||
110 | * happen immediately even if we're in the middle of a batch. Usually we're | ||
111 | * not, though, so there's nothing to do. */ | ||
112 | static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */ | ||
113 | static void lguest_lazy_mode(enum paravirt_lazy_mode mode) | ||
114 | { | 108 | { |
115 | if (mode == PARAVIRT_LAZY_FLUSH) { | 109 | paravirt_leave_lazy(paravirt_get_lazy_mode()); |
116 | if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE)) | 110 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); |
117 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); | ||
118 | } else { | ||
119 | lazy_mode = mode; | ||
120 | if (mode == PARAVIRT_LAZY_NONE) | ||
121 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); | ||
122 | } | ||
123 | } | 111 | } |
124 | 112 | ||
125 | static void lazy_hcall(unsigned long call, | 113 | static void lazy_hcall(unsigned long call, |
@@ -127,7 +115,7 @@ static void lazy_hcall(unsigned long call, | |||
127 | unsigned long arg2, | 115 | unsigned long arg2, |
128 | unsigned long arg3) | 116 | unsigned long arg3) |
129 | { | 117 | { |
130 | if (lazy_mode == PARAVIRT_LAZY_NONE) | 118 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
131 | hcall(call, arg1, arg2, arg3); | 119 | hcall(call, arg1, arg2, arg3); |
132 | else | 120 | else |
133 | async_hcall(call, arg1, arg2, arg3); | 121 | async_hcall(call, arg1, arg2, arg3); |
@@ -331,7 +319,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) | |||
331 | } | 319 | } |
332 | 320 | ||
333 | /*G:038 That's enough excitement for now, back to ploughing through each of | 321 | /*G:038 That's enough excitement for now, back to ploughing through each of |
334 | * the paravirt_ops (we're about 1/3 of the way through). | 322 | * the different pv_ops structures (we're about 1/3 of the way through). |
335 | * | 323 | * |
336 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only | 324 | * This is the Local Descriptor Table, another weird Intel thingy. Linux only |
337 | * uses this for some strange applications like Wine. We don't do anything | 325 | * uses this for some strange applications like Wine. We don't do anything |
@@ -558,7 +546,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) | |||
558 | lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); | 546 | lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); |
559 | } | 547 | } |
560 | 548 | ||
561 | /* Unfortunately for Lguest, the paravirt_ops for page tables were based on | 549 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on |
562 | * native page table operations. On native hardware you can set a new page | 550 | * native page table operations. On native hardware you can set a new page |
563 | * table entry whenever you want, but if you want to remove one you have to do | 551 | * table entry whenever you want, but if you want to remove one you have to do |
564 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). | 552 | * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). |
@@ -782,7 +770,7 @@ static void lguest_time_init(void) | |||
782 | clocksource_register(&lguest_clock); | 770 | clocksource_register(&lguest_clock); |
783 | 771 | ||
784 | /* Now we've set up our clock, we can use it as the scheduler clock */ | 772 | /* Now we've set up our clock, we can use it as the scheduler clock */ |
785 | paravirt_ops.sched_clock = lguest_sched_clock; | 773 | pv_time_ops.sched_clock = lguest_sched_clock; |
786 | 774 | ||
787 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 775 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
788 | * here and register our timer device. */ | 776 | * here and register our timer device. */ |
@@ -904,7 +892,7 @@ static __init char *lguest_memory_setup(void) | |||
904 | /*G:050 | 892 | /*G:050 |
905 | * Patching (Powerfully Placating Performance Pedants) | 893 | * Patching (Powerfully Placating Performance Pedants) |
906 | * | 894 | * |
907 | * We have already seen that "struct paravirt_ops" lets us replace simple | 895 | * We have already seen that pv_ops structures let us replace simple |
908 | * native instructions with calls to the appropriate back end all throughout | 896 | * native instructions with calls to the appropriate back end all throughout |
909 | * the kernel. This allows the same kernel to run as a Guest and as a native | 897 | * the kernel. This allows the same kernel to run as a Guest and as a native |
910 | * kernel, but it's slow because of all the indirect branches. | 898 | * kernel, but it's slow because of all the indirect branches. |
@@ -929,10 +917,10 @@ static const struct lguest_insns | |||
929 | { | 917 | { |
930 | const char *start, *end; | 918 | const char *start, *end; |
931 | } lguest_insns[] = { | 919 | } lguest_insns[] = { |
932 | [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, | 920 | [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, |
933 | [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, | 921 | [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, |
934 | [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, | 922 | [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, |
935 | [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, | 923 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, |
936 | }; | 924 | }; |
937 | 925 | ||
938 | /* Now our patch routine is fairly simple (based on the native one in | 926 | /* Now our patch routine is fairly simple (based on the native one in |
@@ -959,9 +947,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, | |||
959 | return insn_len; | 947 | return insn_len; |
960 | } | 948 | } |
961 | 949 | ||
962 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops | 950 | /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops |
963 | * structure in the kernel provides a single point for (almost) every routine | 951 | * structures in the kernel provide points for (almost) every routine we have |
964 | * we have to override to avoid privileged instructions. */ | 952 | * to override to avoid privileged instructions. */ |
965 | __init void lguest_init(void *boot) | 953 | __init void lguest_init(void *boot) |
966 | { | 954 | { |
967 | /* Copy boot parameters first: the Launcher put the physical location | 955 | /* Copy boot parameters first: the Launcher put the physical location |
@@ -976,54 +964,70 @@ __init void lguest_init(void *boot) | |||
976 | 964 | ||
977 | /* We're under lguest, paravirt is enabled, and we're running at | 965 | /* We're under lguest, paravirt is enabled, and we're running at |
978 | * privilege level 1, not 0 as normal. */ | 966 | * privilege level 1, not 0 as normal. */ |
979 | paravirt_ops.name = "lguest"; | 967 | pv_info.name = "lguest"; |
980 | paravirt_ops.paravirt_enabled = 1; | 968 | pv_info.paravirt_enabled = 1; |
981 | paravirt_ops.kernel_rpl = 1; | 969 | pv_info.kernel_rpl = 1; |
982 | 970 | ||
983 | /* We set up all the lguest overrides for sensitive operations. These | 971 | /* We set up all the lguest overrides for sensitive operations. These |
984 | * are detailed with the operations themselves. */ | 972 | * are detailed with the operations themselves. */ |
985 | paravirt_ops.save_fl = save_fl; | 973 | |
986 | paravirt_ops.restore_fl = restore_fl; | 974 | /* interrupt-related operations */ |
987 | paravirt_ops.irq_disable = irq_disable; | 975 | pv_irq_ops.init_IRQ = lguest_init_IRQ; |
988 | paravirt_ops.irq_enable = irq_enable; | 976 | pv_irq_ops.save_fl = save_fl; |
989 | paravirt_ops.load_gdt = lguest_load_gdt; | 977 | pv_irq_ops.restore_fl = restore_fl; |
990 | paravirt_ops.memory_setup = lguest_memory_setup; | 978 | pv_irq_ops.irq_disable = irq_disable; |
991 | paravirt_ops.cpuid = lguest_cpuid; | 979 | pv_irq_ops.irq_enable = irq_enable; |
992 | paravirt_ops.write_cr3 = lguest_write_cr3; | 980 | pv_irq_ops.safe_halt = lguest_safe_halt; |
993 | paravirt_ops.flush_tlb_user = lguest_flush_tlb_user; | 981 | |
994 | paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; | 982 | /* init-time operations */ |
995 | paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; | 983 | pv_init_ops.memory_setup = lguest_memory_setup; |
996 | paravirt_ops.set_pte = lguest_set_pte; | 984 | pv_init_ops.patch = lguest_patch; |
997 | paravirt_ops.set_pte_at = lguest_set_pte_at; | 985 | |
998 | paravirt_ops.set_pmd = lguest_set_pmd; | 986 | /* Intercepts of various cpu instructions */ |
987 | pv_cpu_ops.load_gdt = lguest_load_gdt; | ||
988 | pv_cpu_ops.cpuid = lguest_cpuid; | ||
989 | pv_cpu_ops.load_idt = lguest_load_idt; | ||
990 | pv_cpu_ops.iret = lguest_iret; | ||
991 | pv_cpu_ops.load_esp0 = lguest_load_esp0; | ||
992 | pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; | ||
993 | pv_cpu_ops.set_ldt = lguest_set_ldt; | ||
994 | pv_cpu_ops.load_tls = lguest_load_tls; | ||
995 | pv_cpu_ops.set_debugreg = lguest_set_debugreg; | ||
996 | pv_cpu_ops.clts = lguest_clts; | ||
997 | pv_cpu_ops.read_cr0 = lguest_read_cr0; | ||
998 | pv_cpu_ops.write_cr0 = lguest_write_cr0; | ||
999 | pv_cpu_ops.read_cr4 = lguest_read_cr4; | ||
1000 | pv_cpu_ops.write_cr4 = lguest_write_cr4; | ||
1001 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; | ||
1002 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; | ||
1003 | pv_cpu_ops.wbinvd = lguest_wbinvd; | ||
1004 | pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; | ||
1005 | pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | ||
1006 | |||
1007 | /* pagetable management */ | ||
1008 | pv_mmu_ops.write_cr3 = lguest_write_cr3; | ||
1009 | pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user; | ||
1010 | pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single; | ||
1011 | pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; | ||
1012 | pv_mmu_ops.set_pte = lguest_set_pte; | ||
1013 | pv_mmu_ops.set_pte_at = lguest_set_pte_at; | ||
1014 | pv_mmu_ops.set_pmd = lguest_set_pmd; | ||
1015 | pv_mmu_ops.read_cr2 = lguest_read_cr2; | ||
1016 | pv_mmu_ops.read_cr3 = lguest_read_cr3; | ||
1017 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; | ||
1018 | pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | ||
1019 | |||
999 | #ifdef CONFIG_X86_LOCAL_APIC | 1020 | #ifdef CONFIG_X86_LOCAL_APIC |
1000 | paravirt_ops.apic_write = lguest_apic_write; | 1021 | /* apic read/write intercepts */ |
1001 | paravirt_ops.apic_write_atomic = lguest_apic_write; | 1022 | pv_apic_ops.apic_write = lguest_apic_write; |
1002 | paravirt_ops.apic_read = lguest_apic_read; | 1023 | pv_apic_ops.apic_write_atomic = lguest_apic_write; |
1024 | pv_apic_ops.apic_read = lguest_apic_read; | ||
1003 | #endif | 1025 | #endif |
1004 | paravirt_ops.load_idt = lguest_load_idt; | 1026 | |
1005 | paravirt_ops.iret = lguest_iret; | 1027 | /* time operations */ |
1006 | paravirt_ops.load_esp0 = lguest_load_esp0; | 1028 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
1007 | paravirt_ops.load_tr_desc = lguest_load_tr_desc; | 1029 | pv_time_ops.time_init = lguest_time_init; |
1008 | paravirt_ops.set_ldt = lguest_set_ldt; | 1030 | |
1009 | paravirt_ops.load_tls = lguest_load_tls; | ||
1010 | paravirt_ops.set_debugreg = lguest_set_debugreg; | ||
1011 | paravirt_ops.clts = lguest_clts; | ||
1012 | paravirt_ops.read_cr0 = lguest_read_cr0; | ||
1013 | paravirt_ops.write_cr0 = lguest_write_cr0; | ||
1014 | paravirt_ops.init_IRQ = lguest_init_IRQ; | ||
1015 | paravirt_ops.read_cr2 = lguest_read_cr2; | ||
1016 | paravirt_ops.read_cr3 = lguest_read_cr3; | ||
1017 | paravirt_ops.read_cr4 = lguest_read_cr4; | ||
1018 | paravirt_ops.write_cr4 = lguest_write_cr4; | ||
1019 | paravirt_ops.write_gdt_entry = lguest_write_gdt_entry; | ||
1020 | paravirt_ops.write_idt_entry = lguest_write_idt_entry; | ||
1021 | paravirt_ops.patch = lguest_patch; | ||
1022 | paravirt_ops.safe_halt = lguest_safe_halt; | ||
1023 | paravirt_ops.get_wallclock = lguest_get_wallclock; | ||
1024 | paravirt_ops.time_init = lguest_time_init; | ||
1025 | paravirt_ops.set_lazy_mode = lguest_lazy_mode; | ||
1026 | paravirt_ops.wbinvd = lguest_wbinvd; | ||
1027 | /* Now is a good time to look at the implementations of these functions | 1031 | /* Now is a good time to look at the implementations of these functions |
1028 | * before returning to the rest of lguest_init(). */ | 1032 | * before returning to the rest of lguest_init(). */ |
1029 | 1033 | ||
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c index 9e7752cc8002..57329788f8a7 100644 --- a/drivers/lguest/lguest_bus.c +++ b/drivers/lguest/lguest_bus.c | |||
@@ -201,7 +201,7 @@ static void scan_devices(void) | |||
201 | * "struct lguest_device_desc" array. */ | 201 | * "struct lguest_device_desc" array. */ |
202 | static int __init lguest_bus_init(void) | 202 | static int __init lguest_bus_init(void) |
203 | { | 203 | { |
204 | if (strcmp(paravirt_ops.name, "lguest") != 0) | 204 | if (strcmp(pv_info.name, "lguest") != 0) |
205 | return 0; | 205 | return 0; |
206 | 206 | ||
207 | /* Devices are in a single page above top of "normal" mem */ | 207 | /* Devices are in a single page above top of "normal" mem */ |