aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm
diff options
context:
space:
mode:
authorDan Kenigsberg <danken@qumranet.com>2007-11-21 10:10:04 -0500
committerAvi Kivity <avi@qumranet.com>2008-01-30 10:53:13 -0500
commit0771671749b59a507b6da4efb931c44d9691e248 (patch)
tree8d73e81194c7522ad9c0754201c5199b79e2bb98 /drivers/kvm
parent6d4e4c4fca5be806b888d606894d914847e82d78 (diff)
KVM: Enhance guest cpuid management
The current cpuid management suffers from several problems, which inhibit passing through the host feature set to the guest: - No way to tell which features the host supports While some features can be supported with no changes to kvm, others need explicit support. That means kvm needs to vet the feature set before it is passed to the guest. - No support for indexed or stateful cpuid entries Some cpuid entries depend on ecx as well as on eax, or on internal state in the processor (running cpuid multiple times with the same input returns different output). The current cpuid machinery only supports keying on eax. - No support for save/restore/migrate The internal state above needs to be exposed to userspace so it can be saved or migrated. This patch adds extended cpuid support by means of three new ioctls: - KVM_GET_SUPPORTED_CPUID: get all cpuid entries the host (and kvm) supports - KVM_SET_CPUID2: sets the vcpu's cpuid table - KVM_GET_CPUID2: gets the vcpu's cpuid table, including hidden state [avi: fix original KVM_SET_CPUID not removing nx on non-nx hosts as it did before] Signed-off-by: Dan Kenigsberg <danken@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm')
-rw-r--r--drivers/kvm/x86.c331
-rw-r--r--drivers/kvm/x86.h2
2 files changed, 323 insertions, 10 deletions
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index 15e1203faef0..7237cb25f77d 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -646,6 +646,7 @@ int kvm_dev_ioctl_check_extension(long ext)
646 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 646 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
647 case KVM_CAP_USER_MEMORY: 647 case KVM_CAP_USER_MEMORY:
648 case KVM_CAP_SET_TSS_ADDR: 648 case KVM_CAP_SET_TSS_ADDR:
649 case KVM_CAP_EXT_CPUID:
649 r = 1; 650 r = 1;
650 break; 651 break;
651 default: 652 default:
@@ -708,13 +709,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
708 kvm_put_guest_fpu(vcpu); 709 kvm_put_guest_fpu(vcpu);
709} 710}
710 711
711static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) 712static int is_efer_nx(void)
712{ 713{
713 u64 efer; 714 u64 efer;
714 int i;
715 struct kvm_cpuid_entry *e, *entry;
716 715
717 rdmsrl(MSR_EFER, efer); 716 rdmsrl(MSR_EFER, efer);
717 return efer & EFER_NX;
718}
719
720static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
721{
722 int i;
723 struct kvm_cpuid_entry2 *e, *entry;
724
718 entry = NULL; 725 entry = NULL;
719 for (i = 0; i < vcpu->cpuid_nent; ++i) { 726 for (i = 0; i < vcpu->cpuid_nent; ++i) {
720 e = &vcpu->cpuid_entries[i]; 727 e = &vcpu->cpuid_entries[i];
@@ -723,16 +730,57 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
723 break; 730 break;
724 } 731 }
725 } 732 }
726 if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { 733 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
727 entry->edx &= ~(1 << 20); 734 entry->edx &= ~(1 << 20);
728 printk(KERN_INFO "kvm: guest NX capability removed\n"); 735 printk(KERN_INFO "kvm: guest NX capability removed\n");
729 } 736 }
730} 737}
731 738
739/* when an old userspace process fills a new kernel module */
732static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 740static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
733 struct kvm_cpuid *cpuid, 741 struct kvm_cpuid *cpuid,
734 struct kvm_cpuid_entry __user *entries) 742 struct kvm_cpuid_entry __user *entries)
735{ 743{
744 int r, i;
745 struct kvm_cpuid_entry *cpuid_entries;
746
747 r = -E2BIG;
748 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
749 goto out;
750 r = -ENOMEM;
751 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
752 if (!cpuid_entries)
753 goto out;
754 r = -EFAULT;
755 if (copy_from_user(cpuid_entries, entries,
756 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
757 goto out_free;
758 for (i = 0; i < cpuid->nent; i++) {
759 vcpu->cpuid_entries[i].function = cpuid_entries[i].function;
760 vcpu->cpuid_entries[i].eax = cpuid_entries[i].eax;
761 vcpu->cpuid_entries[i].ebx = cpuid_entries[i].ebx;
762 vcpu->cpuid_entries[i].ecx = cpuid_entries[i].ecx;
763 vcpu->cpuid_entries[i].edx = cpuid_entries[i].edx;
764 vcpu->cpuid_entries[i].index = 0;
765 vcpu->cpuid_entries[i].flags = 0;
766 vcpu->cpuid_entries[i].padding[0] = 0;
767 vcpu->cpuid_entries[i].padding[1] = 0;
768 vcpu->cpuid_entries[i].padding[2] = 0;
769 }
770 vcpu->cpuid_nent = cpuid->nent;
771 cpuid_fix_nx_cap(vcpu);
772 r = 0;
773
774out_free:
775 vfree(cpuid_entries);
776out:
777 return r;
778}
779
780static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
781 struct kvm_cpuid2 *cpuid,
782 struct kvm_cpuid_entry2 __user *entries)
783{
736 int r; 784 int r;
737 785
738 r = -E2BIG; 786 r = -E2BIG;
@@ -740,16 +788,198 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
740 goto out; 788 goto out;
741 r = -EFAULT; 789 r = -EFAULT;
742 if (copy_from_user(&vcpu->cpuid_entries, entries, 790 if (copy_from_user(&vcpu->cpuid_entries, entries,
743 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 791 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
744 goto out; 792 goto out;
745 vcpu->cpuid_nent = cpuid->nent; 793 vcpu->cpuid_nent = cpuid->nent;
746 cpuid_fix_nx_cap(vcpu);
747 return 0; 794 return 0;
748 795
749out: 796out:
750 return r; 797 return r;
751} 798}
752 799
800static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
801 struct kvm_cpuid2 *cpuid,
802 struct kvm_cpuid_entry2 __user *entries)
803{
804 int r;
805
806 r = -E2BIG;
807 if (cpuid->nent < vcpu->cpuid_nent)
808 goto out;
809 r = -EFAULT;
810 if (copy_to_user(entries, &vcpu->cpuid_entries,
811 vcpu->cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
812 goto out;
813 return 0;
814
815out:
816 cpuid->nent = vcpu->cpuid_nent;
817 return r;
818}
819
820static inline u32 bit(int bitno)
821{
822 return 1 << (bitno & 31);
823}
824
825static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
826 u32 index)
827{
828 entry->function = function;
829 entry->index = index;
830 cpuid_count(entry->function, entry->index,
831 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
832 entry->flags = 0;
833}
834
835static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
836 u32 index, int *nent, int maxnent)
837{
838 const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) |
839 bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
840 bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
841 bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
842 bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
843 bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
844 bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
845 bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
846 bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
847 bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
848 const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
849 bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
850 bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
851 bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
852 bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
853 bit(X86_FEATURE_PGE) |
854 bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
855 bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
856 bit(X86_FEATURE_SYSCALL) |
857 (bit(X86_FEATURE_NX) && is_efer_nx()) |
858#ifdef CONFIG_X86_64
859 bit(X86_FEATURE_LM) |
860#endif
861 bit(X86_FEATURE_MMXEXT) |
862 bit(X86_FEATURE_3DNOWEXT) |
863 bit(X86_FEATURE_3DNOW);
864 const u32 kvm_supported_word3_x86_features =
865 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
866 const u32 kvm_supported_word6_x86_features =
867 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
868
869 /* all func 2 cpuid_count() should be called on the same cpu */
870 get_cpu();
871 do_cpuid_1_ent(entry, function, index);
872 ++*nent;
873
874 switch (function) {
875 case 0:
876 entry->eax = min(entry->eax, (u32)0xb);
877 break;
878 case 1:
879 entry->edx &= kvm_supported_word0_x86_features;
880 entry->ecx &= kvm_supported_word3_x86_features;
881 break;
882 /* function 2 entries are STATEFUL. That is, repeated cpuid commands
883 * may return different values. This forces us to get_cpu() before
884 * issuing the first command, and also to emulate this annoying behavior
885 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
886 case 2: {
887 int t, times = entry->eax & 0xff;
888
889 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
890 for (t = 1; t < times && *nent < maxnent; ++t) {
891 do_cpuid_1_ent(&entry[t], function, 0);
892 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
893 ++*nent;
894 }
895 break;
896 }
897 /* function 4 and 0xb have additional index. */
898 case 4: {
899 int index, cache_type;
900
901 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
902 /* read more entries until cache_type is zero */
903 for (index = 1; *nent < maxnent; ++index) {
904 cache_type = entry[index - 1].eax & 0x1f;
905 if (!cache_type)
906 break;
907 do_cpuid_1_ent(&entry[index], function, index);
908 entry[index].flags |=
909 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
910 ++*nent;
911 }
912 break;
913 }
914 case 0xb: {
915 int index, level_type;
916
917 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
918 /* read more entries until level_type is zero */
919 for (index = 1; *nent < maxnent; ++index) {
920 level_type = entry[index - 1].ecx & 0xff;
921 if (!level_type)
922 break;
923 do_cpuid_1_ent(&entry[index], function, index);
924 entry[index].flags |=
925 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
926 ++*nent;
927 }
928 break;
929 }
930 case 0x80000000:
931 entry->eax = min(entry->eax, 0x8000001a);
932 break;
933 case 0x80000001:
934 entry->edx &= kvm_supported_word1_x86_features;
935 entry->ecx &= kvm_supported_word6_x86_features;
936 break;
937 }
938 put_cpu();
939}
940
941static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm,
942 struct kvm_cpuid2 *cpuid,
943 struct kvm_cpuid_entry2 __user *entries)
944{
945 struct kvm_cpuid_entry2 *cpuid_entries;
946 int limit, nent = 0, r = -E2BIG;
947 u32 func;
948
949 if (cpuid->nent < 1)
950 goto out;
951 r = -ENOMEM;
952 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
953 if (!cpuid_entries)
954 goto out;
955
956 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
957 limit = cpuid_entries[0].eax;
958 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
959 do_cpuid_ent(&cpuid_entries[nent], func, 0,
960 &nent, cpuid->nent);
961 r = -E2BIG;
962 if (nent >= cpuid->nent)
963 goto out_free;
964
965 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
966 limit = cpuid_entries[nent - 1].eax;
967 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
968 do_cpuid_ent(&cpuid_entries[nent], func, 0,
969 &nent, cpuid->nent);
970 r = -EFAULT;
971 if (copy_to_user(entries, cpuid_entries,
972 nent * sizeof(struct kvm_cpuid_entry2)))
973 goto out_free;
974 cpuid->nent = nent;
975 r = 0;
976
977out_free:
978 vfree(cpuid_entries);
979out:
980 return r;
981}
982
753static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 983static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
754 struct kvm_lapic_state *s) 984 struct kvm_lapic_state *s)
755{ 985{
@@ -816,6 +1046,36 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
816 goto out; 1046 goto out;
817 break; 1047 break;
818 } 1048 }
1049 case KVM_SET_CPUID2: {
1050 struct kvm_cpuid2 __user *cpuid_arg = argp;
1051 struct kvm_cpuid2 cpuid;
1052
1053 r = -EFAULT;
1054 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1055 goto out;
1056 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
1057 cpuid_arg->entries);
1058 if (r)
1059 goto out;
1060 break;
1061 }
1062 case KVM_GET_CPUID2: {
1063 struct kvm_cpuid2 __user *cpuid_arg = argp;
1064 struct kvm_cpuid2 cpuid;
1065
1066 r = -EFAULT;
1067 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1068 goto out;
1069 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
1070 cpuid_arg->entries);
1071 if (r)
1072 goto out;
1073 r = -EFAULT;
1074 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
1075 goto out;
1076 r = 0;
1077 break;
1078 }
819 case KVM_GET_MSRS: 1079 case KVM_GET_MSRS:
820 r = msr_io(vcpu, argp, kvm_get_msr, 1); 1080 r = msr_io(vcpu, argp, kvm_get_msr, 1);
821 break; 1081 break;
@@ -1111,6 +1371,24 @@ long kvm_arch_vm_ioctl(struct file *filp,
1111 r = 0; 1371 r = 0;
1112 break; 1372 break;
1113 } 1373 }
1374 case KVM_GET_SUPPORTED_CPUID: {
1375 struct kvm_cpuid2 __user *cpuid_arg = argp;
1376 struct kvm_cpuid2 cpuid;
1377
1378 r = -EFAULT;
1379 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1380 goto out;
1381 r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid,
1382 cpuid_arg->entries);
1383 if (r)
1384 goto out;
1385
1386 r = -EFAULT;
1387 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
1388 goto out;
1389 r = 0;
1390 break;
1391 }
1114 default: 1392 default:
1115 ; 1393 ;
1116 } 1394 }
@@ -1908,14 +2186,47 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1908 } 2186 }
1909} 2187}
1910 2188
2189static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
2190{
2191 struct kvm_cpuid_entry2 *e = &vcpu->cpuid_entries[i];
2192 int j, nent = vcpu->cpuid_nent;
2193
2194 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
2195 /* when no next entry is found, the current entry[i] is reselected */
2196 for (j = i + 1; j == i; j = (j + 1) % nent) {
2197 struct kvm_cpuid_entry2 *ej = &vcpu->cpuid_entries[j];
2198 if (ej->function == e->function) {
2199 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
2200 return j;
2201 }
2202 }
2203 return 0; /* silence gcc, even though control never reaches here */
2204}
2205
2206/* find an entry with matching function, matching index (if needed), and that
2207 * should be read next (if it's stateful) */
2208static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
2209 u32 function, u32 index)
2210{
2211 if (e->function != function)
2212 return 0;
2213 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
2214 return 0;
2215 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
2216 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
2217 return 0;
2218 return 1;
2219}
2220
1911void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 2221void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1912{ 2222{
1913 int i; 2223 int i;
1914 u32 function; 2224 u32 function, index;
1915 struct kvm_cpuid_entry *e, *best; 2225 struct kvm_cpuid_entry2 *e, *best;
1916 2226
1917 kvm_x86_ops->cache_regs(vcpu); 2227 kvm_x86_ops->cache_regs(vcpu);
1918 function = vcpu->regs[VCPU_REGS_RAX]; 2228 function = vcpu->regs[VCPU_REGS_RAX];
2229 index = vcpu->regs[VCPU_REGS_RCX];
1919 vcpu->regs[VCPU_REGS_RAX] = 0; 2230 vcpu->regs[VCPU_REGS_RAX] = 0;
1920 vcpu->regs[VCPU_REGS_RBX] = 0; 2231 vcpu->regs[VCPU_REGS_RBX] = 0;
1921 vcpu->regs[VCPU_REGS_RCX] = 0; 2232 vcpu->regs[VCPU_REGS_RCX] = 0;
@@ -1923,7 +2234,9 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1923 best = NULL; 2234 best = NULL;
1924 for (i = 0; i < vcpu->cpuid_nent; ++i) { 2235 for (i = 0; i < vcpu->cpuid_nent; ++i) {
1925 e = &vcpu->cpuid_entries[i]; 2236 e = &vcpu->cpuid_entries[i];
1926 if (e->function == function) { 2237 if (is_matching_cpuid_entry(e, function, index)) {
2238 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
2239 move_to_next_stateful_cpuid_entry(vcpu, i);
1927 best = e; 2240 best = e;
1928 break; 2241 break;
1929 } 2242 }
diff --git a/drivers/kvm/x86.h b/drivers/kvm/x86.h
index b1528c9f566f..78ab1e108d8b 100644
--- a/drivers/kvm/x86.h
+++ b/drivers/kvm/x86.h
@@ -149,7 +149,7 @@ struct kvm_vcpu {
149 int halt_request; /* real mode on Intel only */ 149 int halt_request; /* real mode on Intel only */
150 150
151 int cpuid_nent; 151 int cpuid_nent;
152 struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; 152 struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
153 153
154 /* emulate context */ 154 /* emulate context */
155 155