diff options
Diffstat (limited to 'arch/x86')
47 files changed, 2137 insertions, 532 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5c0ed72c02a2..30c40f08a3d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -2032,7 +2032,6 @@ menu "Bus options (PCI etc.)" | |||
2032 | config PCI | 2032 | config PCI |
2033 | bool "PCI support" | 2033 | bool "PCI support" |
2034 | default y | 2034 | default y |
2035 | select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) | ||
2036 | ---help--- | 2035 | ---help--- |
2037 | Find out whether you have a PCI motherboard. PCI is the name of a | 2036 | Find out whether you have a PCI motherboard. PCI is the name of a |
2038 | bus system, i.e. the way the CPU talks to the other stuff inside | 2037 | bus system, i.e. the way the CPU talks to the other stuff inside |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 6c63c358a7e6..7d6ba9db1be9 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o | |||
27 | obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o | 27 | obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o |
28 | obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o | 28 | obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o |
29 | obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o | 29 | obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o |
30 | obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o | ||
30 | 31 | ||
31 | # These modules require assembler to support AVX. | 32 | # These modules require assembler to support AVX. |
32 | ifeq ($(avx_supported),yes) | 33 | ifeq ($(avx_supported),yes) |
@@ -81,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o | |||
81 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o | 82 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o |
82 | sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o | 83 | sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o |
83 | sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o | 84 | sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o |
85 | crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o | ||
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 5cb86ccd4acb..c171dcbf192d 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c | |||
@@ -62,7 +62,7 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | /* camellia sboxes */ | 64 | /* camellia sboxes */ |
65 | const u64 camellia_sp10011110[256] = { | 65 | __visible const u64 camellia_sp10011110[256] = { |
66 | 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, | 66 | 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, |
67 | 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, | 67 | 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, |
68 | 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, | 68 | 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, |
@@ -151,7 +151,7 @@ const u64 camellia_sp10011110[256] = { | |||
151 | 0x9e00009e9e9e9e00ULL, | 151 | 0x9e00009e9e9e9e00ULL, |
152 | }; | 152 | }; |
153 | 153 | ||
154 | const u64 camellia_sp22000222[256] = { | 154 | __visible const u64 camellia_sp22000222[256] = { |
155 | 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, | 155 | 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, |
156 | 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, | 156 | 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, |
157 | 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, | 157 | 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, |
@@ -240,7 +240,7 @@ const u64 camellia_sp22000222[256] = { | |||
240 | 0x3d3d0000003d3d3dULL, | 240 | 0x3d3d0000003d3d3dULL, |
241 | }; | 241 | }; |
242 | 242 | ||
243 | const u64 camellia_sp03303033[256] = { | 243 | __visible const u64 camellia_sp03303033[256] = { |
244 | 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, | 244 | 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, |
245 | 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, | 245 | 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, |
246 | 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, | 246 | 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, |
@@ -329,7 +329,7 @@ const u64 camellia_sp03303033[256] = { | |||
329 | 0x004f4f004f004f4fULL, | 329 | 0x004f4f004f004f4fULL, |
330 | }; | 330 | }; |
331 | 331 | ||
332 | const u64 camellia_sp00444404[256] = { | 332 | __visible const u64 camellia_sp00444404[256] = { |
333 | 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, | 333 | 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, |
334 | 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, | 334 | 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, |
335 | 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, | 335 | 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, |
@@ -418,7 +418,7 @@ const u64 camellia_sp00444404[256] = { | |||
418 | 0x00009e9e9e9e009eULL, | 418 | 0x00009e9e9e9e009eULL, |
419 | }; | 419 | }; |
420 | 420 | ||
421 | const u64 camellia_sp02220222[256] = { | 421 | __visible const u64 camellia_sp02220222[256] = { |
422 | 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, | 422 | 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, |
423 | 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, | 423 | 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, |
424 | 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, | 424 | 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, |
@@ -507,7 +507,7 @@ const u64 camellia_sp02220222[256] = { | |||
507 | 0x003d3d3d003d3d3dULL, | 507 | 0x003d3d3d003d3d3dULL, |
508 | }; | 508 | }; |
509 | 509 | ||
510 | const u64 camellia_sp30333033[256] = { | 510 | __visible const u64 camellia_sp30333033[256] = { |
511 | 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, | 511 | 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, |
512 | 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, | 512 | 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, |
513 | 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, | 513 | 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, |
@@ -596,7 +596,7 @@ const u64 camellia_sp30333033[256] = { | |||
596 | 0x4f004f4f4f004f4fULL, | 596 | 0x4f004f4f4f004f4fULL, |
597 | }; | 597 | }; |
598 | 598 | ||
599 | const u64 camellia_sp44044404[256] = { | 599 | __visible const u64 camellia_sp44044404[256] = { |
600 | 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, | 600 | 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, |
601 | 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, | 601 | 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, |
602 | 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, | 602 | 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, |
@@ -685,7 +685,7 @@ const u64 camellia_sp44044404[256] = { | |||
685 | 0x9e9e009e9e9e009eULL, | 685 | 0x9e9e009e9e9e009eULL, |
686 | }; | 686 | }; |
687 | 687 | ||
688 | const u64 camellia_sp11101110[256] = { | 688 | __visible const u64 camellia_sp11101110[256] = { |
689 | 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, | 689 | 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, |
690 | 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, | 690 | 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, |
691 | 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, | 691 | 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, |
@@ -828,8 +828,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
828 | 828 | ||
829 | subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; | 829 | subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; |
830 | /* modified for FLinv(kl2) */ | 830 | /* modified for FLinv(kl2) */ |
831 | dw = (subRL[1] & subRL[9]) >> 32, | 831 | dw = (subRL[1] & subRL[9]) >> 32; |
832 | subRL[1] ^= rol32(dw, 1); | 832 | subRL[1] ^= rol32(dw, 1); |
833 | 833 | ||
834 | /* round 8 */ | 834 | /* round 8 */ |
835 | subRL[11] ^= subRL[1]; | 835 | subRL[11] ^= subRL[1]; |
@@ -840,8 +840,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
840 | 840 | ||
841 | subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; | 841 | subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; |
842 | /* modified for FLinv(kl4) */ | 842 | /* modified for FLinv(kl4) */ |
843 | dw = (subRL[1] & subRL[17]) >> 32, | 843 | dw = (subRL[1] & subRL[17]) >> 32; |
844 | subRL[1] ^= rol32(dw, 1); | 844 | subRL[1] ^= rol32(dw, 1); |
845 | 845 | ||
846 | /* round 14 */ | 846 | /* round 14 */ |
847 | subRL[19] ^= subRL[1]; | 847 | subRL[19] ^= subRL[1]; |
@@ -859,8 +859,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
859 | } else { | 859 | } else { |
860 | subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; | 860 | subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; |
861 | /* modified for FLinv(kl6) */ | 861 | /* modified for FLinv(kl6) */ |
862 | dw = (subRL[1] & subRL[25]) >> 32, | 862 | dw = (subRL[1] & subRL[25]) >> 32; |
863 | subRL[1] ^= rol32(dw, 1); | 863 | subRL[1] ^= rol32(dw, 1); |
864 | 864 | ||
865 | /* round 20 */ | 865 | /* round 20 */ |
866 | subRL[27] ^= subRL[1]; | 866 | subRL[27] ^= subRL[1]; |
@@ -882,8 +882,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
882 | 882 | ||
883 | kw4 ^= (kw4 & ~subRL[24]) << 32; | 883 | kw4 ^= (kw4 & ~subRL[24]) << 32; |
884 | /* modified for FL(kl5) */ | 884 | /* modified for FL(kl5) */ |
885 | dw = (kw4 & subRL[24]) >> 32, | 885 | dw = (kw4 & subRL[24]) >> 32; |
886 | kw4 ^= rol32(dw, 1); | 886 | kw4 ^= rol32(dw, 1); |
887 | } | 887 | } |
888 | 888 | ||
889 | /* round 17 */ | 889 | /* round 17 */ |
@@ -895,8 +895,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
895 | 895 | ||
896 | kw4 ^= (kw4 & ~subRL[16]) << 32; | 896 | kw4 ^= (kw4 & ~subRL[16]) << 32; |
897 | /* modified for FL(kl3) */ | 897 | /* modified for FL(kl3) */ |
898 | dw = (kw4 & subRL[16]) >> 32, | 898 | dw = (kw4 & subRL[16]) >> 32; |
899 | kw4 ^= rol32(dw, 1); | 899 | kw4 ^= rol32(dw, 1); |
900 | 900 | ||
901 | /* round 11 */ | 901 | /* round 11 */ |
902 | subRL[14] ^= kw4; | 902 | subRL[14] ^= kw4; |
@@ -907,8 +907,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
907 | 907 | ||
908 | kw4 ^= (kw4 & ~subRL[8]) << 32; | 908 | kw4 ^= (kw4 & ~subRL[8]) << 32; |
909 | /* modified for FL(kl1) */ | 909 | /* modified for FL(kl1) */ |
910 | dw = (kw4 & subRL[8]) >> 32, | 910 | dw = (kw4 & subRL[8]) >> 32; |
911 | kw4 ^= rol32(dw, 1); | 911 | kw4 ^= rol32(dw, 1); |
912 | 912 | ||
913 | /* round 5 */ | 913 | /* round 5 */ |
914 | subRL[6] ^= kw4; | 914 | subRL[6] ^= kw4; |
@@ -928,8 +928,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
928 | SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ | 928 | SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ |
929 | 929 | ||
930 | tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); | 930 | tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); |
931 | dw = tl & (subRL[8] >> 32), /* FL(kl1) */ | 931 | dw = tl & (subRL[8] >> 32); /* FL(kl1) */ |
932 | tr = subRL[10] ^ rol32(dw, 1); | 932 | tr = subRL[10] ^ rol32(dw, 1); |
933 | tt = (tr | ((u64)tl << 32)); | 933 | tt = (tr | ((u64)tl << 32)); |
934 | 934 | ||
935 | SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ | 935 | SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ |
@@ -937,8 +937,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
937 | SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ | 937 | SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ |
938 | 938 | ||
939 | tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); | 939 | tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); |
940 | dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */ | 940 | dw = tl & (subRL[9] >> 32); /* FLinv(kl2) */ |
941 | tr = subRL[7] ^ rol32(dw, 1); | 941 | tr = subRL[7] ^ rol32(dw, 1); |
942 | tt = (tr | ((u64)tl << 32)); | 942 | tt = (tr | ((u64)tl << 32)); |
943 | 943 | ||
944 | SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ | 944 | SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ |
@@ -948,8 +948,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
948 | SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ | 948 | SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ |
949 | 949 | ||
950 | tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); | 950 | tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); |
951 | dw = tl & (subRL[16] >> 32), /* FL(kl3) */ | 951 | dw = tl & (subRL[16] >> 32); /* FL(kl3) */ |
952 | tr = subRL[18] ^ rol32(dw, 1); | 952 | tr = subRL[18] ^ rol32(dw, 1); |
953 | tt = (tr | ((u64)tl << 32)); | 953 | tt = (tr | ((u64)tl << 32)); |
954 | 954 | ||
955 | SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ | 955 | SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ |
@@ -957,8 +957,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
957 | SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ | 957 | SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ |
958 | 958 | ||
959 | tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); | 959 | tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); |
960 | dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */ | 960 | dw = tl & (subRL[17] >> 32); /* FLinv(kl4) */ |
961 | tr = subRL[15] ^ rol32(dw, 1); | 961 | tr = subRL[15] ^ rol32(dw, 1); |
962 | tt = (tr | ((u64)tl << 32)); | 962 | tt = (tr | ((u64)tl << 32)); |
963 | 963 | ||
964 | SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ | 964 | SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ |
@@ -972,8 +972,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
972 | SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ | 972 | SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ |
973 | } else { | 973 | } else { |
974 | tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); | 974 | tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); |
975 | dw = tl & (subRL[24] >> 32), /* FL(kl5) */ | 975 | dw = tl & (subRL[24] >> 32); /* FL(kl5) */ |
976 | tr = subRL[26] ^ rol32(dw, 1); | 976 | tr = subRL[26] ^ rol32(dw, 1); |
977 | tt = (tr | ((u64)tl << 32)); | 977 | tt = (tr | ((u64)tl << 32)); |
978 | 978 | ||
979 | SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ | 979 | SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ |
@@ -981,8 +981,8 @@ static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | |||
981 | SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ | 981 | SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ |
982 | 982 | ||
983 | tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); | 983 | tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); |
984 | dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */ | 984 | dw = tl & (subRL[25] >> 32); /* FLinv(kl6) */ |
985 | tr = subRL[23] ^ rol32(dw, 1); | 985 | tr = subRL[23] ^ rol32(dw, 1); |
986 | tt = (tr | ((u64)tl << 32)); | 986 | tt = (tr | ((u64)tl << 32)); |
987 | 987 | ||
988 | SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ | 988 | SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ |
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S new file mode 100644 index 000000000000..35e97569d05f --- /dev/null +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S | |||
@@ -0,0 +1,643 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions | ||
3 | # | ||
4 | # Copyright (c) 2013, Intel Corporation | ||
5 | # | ||
6 | # Authors: | ||
7 | # Erdinc Ozturk <erdinc.ozturk@intel.com> | ||
8 | # Vinodh Gopal <vinodh.gopal@intel.com> | ||
9 | # James Guilford <james.guilford@intel.com> | ||
10 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
11 | # | ||
12 | # This software is available to you under a choice of one of two | ||
13 | # licenses. You may choose to be licensed under the terms of the GNU | ||
14 | # General Public License (GPL) Version 2, available from the file | ||
15 | # COPYING in the main directory of this source tree, or the | ||
16 | # OpenIB.org BSD license below: | ||
17 | # | ||
18 | # Redistribution and use in source and binary forms, with or without | ||
19 | # modification, are permitted provided that the following conditions are | ||
20 | # met: | ||
21 | # | ||
22 | # * Redistributions of source code must retain the above copyright | ||
23 | # notice, this list of conditions and the following disclaimer. | ||
24 | # | ||
25 | # * Redistributions in binary form must reproduce the above copyright | ||
26 | # notice, this list of conditions and the following disclaimer in the | ||
27 | # documentation and/or other materials provided with the | ||
28 | # distribution. | ||
29 | # | ||
30 | # * Neither the name of the Intel Corporation nor the names of its | ||
31 | # contributors may be used to endorse or promote products derived from | ||
32 | # this software without specific prior written permission. | ||
33 | # | ||
34 | # | ||
35 | # THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY | ||
36 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
37 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
38 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR | ||
39 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
40 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
41 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
42 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
43 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
44 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
45 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
46 | ######################################################################## | ||
47 | # Function API: | ||
48 | # UINT16 crc_t10dif_pcl( | ||
49 | # UINT16 init_crc, //initial CRC value, 16 bits | ||
50 | # const unsigned char *buf, //buffer pointer to calculate CRC on | ||
51 | # UINT64 len //buffer length in bytes (64-bit data) | ||
52 | # ); | ||
53 | # | ||
54 | # Reference paper titled "Fast CRC Computation for Generic | ||
55 | # Polynomials Using PCLMULQDQ Instruction" | ||
56 | # URL: http://www.intel.com/content/dam/www/public/us/en/documents | ||
57 | # /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | ||
58 | # | ||
59 | # | ||
60 | |||
61 | #include <linux/linkage.h> | ||
62 | |||
63 | .text | ||
64 | |||
65 | #define arg1 %rdi | ||
66 | #define arg2 %rsi | ||
67 | #define arg3 %rdx | ||
68 | |||
69 | #define arg1_low32 %edi | ||
70 | |||
71 | ENTRY(crc_t10dif_pcl) | ||
72 | .align 16 | ||
73 | |||
74 | # adjust the 16-bit initial_crc value, scale it to 32 bits | ||
75 | shl $16, arg1_low32 | ||
76 | |||
77 | # Allocate Stack Space | ||
78 | mov %rsp, %rcx | ||
79 | sub $16*2, %rsp | ||
80 | # align stack to 16 byte boundary | ||
81 | and $~(0x10 - 1), %rsp | ||
82 | |||
83 | # check if smaller than 256 | ||
84 | cmp $256, arg3 | ||
85 | |||
86 | # for sizes less than 128, we can't fold 64B at a time... | ||
87 | jl _less_than_128 | ||
88 | |||
89 | |||
90 | # load the initial crc value | ||
91 | movd arg1_low32, %xmm10 # initial crc | ||
92 | |||
93 | # crc value does not need to be byte-reflected, but it needs | ||
94 | # to be moved to the high part of the register. | ||
95 | # because data will be byte-reflected and will align with | ||
96 | # initial crc at correct place. | ||
97 | pslldq $12, %xmm10 | ||
98 | |||
99 | movdqa SHUF_MASK(%rip), %xmm11 | ||
100 | # receive the initial 64B data, xor the initial crc value | ||
101 | movdqu 16*0(arg2), %xmm0 | ||
102 | movdqu 16*1(arg2), %xmm1 | ||
103 | movdqu 16*2(arg2), %xmm2 | ||
104 | movdqu 16*3(arg2), %xmm3 | ||
105 | movdqu 16*4(arg2), %xmm4 | ||
106 | movdqu 16*5(arg2), %xmm5 | ||
107 | movdqu 16*6(arg2), %xmm6 | ||
108 | movdqu 16*7(arg2), %xmm7 | ||
109 | |||
110 | pshufb %xmm11, %xmm0 | ||
111 | # XOR the initial_crc value | ||
112 | pxor %xmm10, %xmm0 | ||
113 | pshufb %xmm11, %xmm1 | ||
114 | pshufb %xmm11, %xmm2 | ||
115 | pshufb %xmm11, %xmm3 | ||
116 | pshufb %xmm11, %xmm4 | ||
117 | pshufb %xmm11, %xmm5 | ||
118 | pshufb %xmm11, %xmm6 | ||
119 | pshufb %xmm11, %xmm7 | ||
120 | |||
121 | movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 | ||
122 | #imm value of pclmulqdq instruction | ||
123 | #will determine which constant to use | ||
124 | |||
125 | ################################################################# | ||
126 | # we subtract 256 instead of 128 to save one instruction from the loop | ||
127 | sub $256, arg3 | ||
128 | |||
129 | # at this section of the code, there is 64*x+y (0<=y<64) bytes of | ||
130 | # buffer. The _fold_64_B_loop will fold 64B at a time | ||
131 | # until we have 64+y Bytes of buffer | ||
132 | |||
133 | |||
134 | # fold 64B at a time. This section of the code folds 4 xmm | ||
135 | # registers in parallel | ||
136 | _fold_64_B_loop: | ||
137 | |||
138 | # update the buffer pointer | ||
139 | add $128, arg2 # buf += 64# | ||
140 | |||
141 | movdqu 16*0(arg2), %xmm9 | ||
142 | movdqu 16*1(arg2), %xmm12 | ||
143 | pshufb %xmm11, %xmm9 | ||
144 | pshufb %xmm11, %xmm12 | ||
145 | movdqa %xmm0, %xmm8 | ||
146 | movdqa %xmm1, %xmm13 | ||
147 | pclmulqdq $0x0 , %xmm10, %xmm0 | ||
148 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
149 | pclmulqdq $0x0 , %xmm10, %xmm1 | ||
150 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
151 | pxor %xmm9 , %xmm0 | ||
152 | xorps %xmm8 , %xmm0 | ||
153 | pxor %xmm12, %xmm1 | ||
154 | xorps %xmm13, %xmm1 | ||
155 | |||
156 | movdqu 16*2(arg2), %xmm9 | ||
157 | movdqu 16*3(arg2), %xmm12 | ||
158 | pshufb %xmm11, %xmm9 | ||
159 | pshufb %xmm11, %xmm12 | ||
160 | movdqa %xmm2, %xmm8 | ||
161 | movdqa %xmm3, %xmm13 | ||
162 | pclmulqdq $0x0, %xmm10, %xmm2 | ||
163 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
164 | pclmulqdq $0x0, %xmm10, %xmm3 | ||
165 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
166 | pxor %xmm9 , %xmm2 | ||
167 | xorps %xmm8 , %xmm2 | ||
168 | pxor %xmm12, %xmm3 | ||
169 | xorps %xmm13, %xmm3 | ||
170 | |||
171 | movdqu 16*4(arg2), %xmm9 | ||
172 | movdqu 16*5(arg2), %xmm12 | ||
173 | pshufb %xmm11, %xmm9 | ||
174 | pshufb %xmm11, %xmm12 | ||
175 | movdqa %xmm4, %xmm8 | ||
176 | movdqa %xmm5, %xmm13 | ||
177 | pclmulqdq $0x0, %xmm10, %xmm4 | ||
178 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
179 | pclmulqdq $0x0, %xmm10, %xmm5 | ||
180 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
181 | pxor %xmm9 , %xmm4 | ||
182 | xorps %xmm8 , %xmm4 | ||
183 | pxor %xmm12, %xmm5 | ||
184 | xorps %xmm13, %xmm5 | ||
185 | |||
186 | movdqu 16*6(arg2), %xmm9 | ||
187 | movdqu 16*7(arg2), %xmm12 | ||
188 | pshufb %xmm11, %xmm9 | ||
189 | pshufb %xmm11, %xmm12 | ||
190 | movdqa %xmm6 , %xmm8 | ||
191 | movdqa %xmm7 , %xmm13 | ||
192 | pclmulqdq $0x0 , %xmm10, %xmm6 | ||
193 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
194 | pclmulqdq $0x0 , %xmm10, %xmm7 | ||
195 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
196 | pxor %xmm9 , %xmm6 | ||
197 | xorps %xmm8 , %xmm6 | ||
198 | pxor %xmm12, %xmm7 | ||
199 | xorps %xmm13, %xmm7 | ||
200 | |||
201 | sub $128, arg3 | ||
202 | |||
203 | # check if there is another 64B in the buffer to be able to fold | ||
204 | jge _fold_64_B_loop | ||
205 | ################################################################## | ||
206 | |||
207 | |||
208 | add $128, arg2 | ||
209 | # at this point, the buffer pointer is pointing at the last y Bytes | ||
210 | # of the buffer the 64B of folded data is in 4 of the xmm | ||
211 | # registers: xmm0, xmm1, xmm2, xmm3 | ||
212 | |||
213 | |||
214 | # fold the 8 xmm registers to 1 xmm register with different constants | ||
215 | |||
216 | movdqa rk9(%rip), %xmm10 | ||
217 | movdqa %xmm0, %xmm8 | ||
218 | pclmulqdq $0x11, %xmm10, %xmm0 | ||
219 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
220 | pxor %xmm8, %xmm7 | ||
221 | xorps %xmm0, %xmm7 | ||
222 | |||
223 | movdqa rk11(%rip), %xmm10 | ||
224 | movdqa %xmm1, %xmm8 | ||
225 | pclmulqdq $0x11, %xmm10, %xmm1 | ||
226 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
227 | pxor %xmm8, %xmm7 | ||
228 | xorps %xmm1, %xmm7 | ||
229 | |||
230 | movdqa rk13(%rip), %xmm10 | ||
231 | movdqa %xmm2, %xmm8 | ||
232 | pclmulqdq $0x11, %xmm10, %xmm2 | ||
233 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
234 | pxor %xmm8, %xmm7 | ||
235 | pxor %xmm2, %xmm7 | ||
236 | |||
237 | movdqa rk15(%rip), %xmm10 | ||
238 | movdqa %xmm3, %xmm8 | ||
239 | pclmulqdq $0x11, %xmm10, %xmm3 | ||
240 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
241 | pxor %xmm8, %xmm7 | ||
242 | xorps %xmm3, %xmm7 | ||
243 | |||
244 | movdqa rk17(%rip), %xmm10 | ||
245 | movdqa %xmm4, %xmm8 | ||
246 | pclmulqdq $0x11, %xmm10, %xmm4 | ||
247 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
248 | pxor %xmm8, %xmm7 | ||
249 | pxor %xmm4, %xmm7 | ||
250 | |||
251 | movdqa rk19(%rip), %xmm10 | ||
252 | movdqa %xmm5, %xmm8 | ||
253 | pclmulqdq $0x11, %xmm10, %xmm5 | ||
254 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
255 | pxor %xmm8, %xmm7 | ||
256 | xorps %xmm5, %xmm7 | ||
257 | |||
258 | movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 | ||
259 | #imm value of pclmulqdq instruction | ||
260 | #will determine which constant to use | ||
261 | movdqa %xmm6, %xmm8 | ||
262 | pclmulqdq $0x11, %xmm10, %xmm6 | ||
263 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
264 | pxor %xmm8, %xmm7 | ||
265 | pxor %xmm6, %xmm7 | ||
266 | |||
267 | |||
268 | # instead of 64, we add 48 to the loop counter to save 1 instruction | ||
269 | # from the loop instead of a cmp instruction, we use the negative | ||
270 | # flag with the jl instruction | ||
271 | add $128-16, arg3 | ||
272 | jl _final_reduction_for_128 | ||
273 | |||
274 | # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 | ||
275 | # and the rest is in memory. We can fold 16 bytes at a time if y>=16 | ||
276 | # continue folding 16B at a time | ||
277 | |||
278 | _16B_reduction_loop: | ||
279 | movdqa %xmm7, %xmm8 | ||
280 | pclmulqdq $0x11, %xmm10, %xmm7 | ||
281 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
282 | pxor %xmm8, %xmm7 | ||
283 | movdqu (arg2), %xmm0 | ||
284 | pshufb %xmm11, %xmm0 | ||
285 | pxor %xmm0 , %xmm7 | ||
286 | add $16, arg2 | ||
287 | sub $16, arg3 | ||
288 | # instead of a cmp instruction, we utilize the flags with the | ||
289 | # jge instruction equivalent of: cmp arg3, 16-16 | ||
290 | # check if there is any more 16B in the buffer to be able to fold | ||
291 | jge _16B_reduction_loop | ||
292 | |||
293 | #now we have 16+z bytes left to reduce, where 0<= z < 16. | ||
294 | #first, we reduce the data in the xmm7 register | ||
295 | |||
296 | |||
297 | _final_reduction_for_128: | ||
298 | # check if any more data to fold. If not, compute the CRC of | ||
299 | # the final 128 bits | ||
300 | add $16, arg3 | ||
301 | je _128_done | ||
302 | |||
303 | # here we are getting data that is less than 16 bytes. | ||
304 | # since we know that there was data before the pointer, we can | ||
305 | # offset the input pointer before the actual point, to receive | ||
306 | # exactly 16 bytes. after that the registers need to be adjusted. | ||
307 | _get_last_two_xmms: | ||
308 | movdqa %xmm7, %xmm2 | ||
309 | |||
310 | movdqu -16(arg2, arg3), %xmm1 | ||
311 | pshufb %xmm11, %xmm1 | ||
312 | |||
313 | # get rid of the extra data that was loaded before | ||
314 | # load the shift constant | ||
315 | lea pshufb_shf_table+16(%rip), %rax | ||
316 | sub arg3, %rax | ||
317 | movdqu (%rax), %xmm0 | ||
318 | |||
319 | # shift xmm2 to the left by arg3 bytes | ||
320 | pshufb %xmm0, %xmm2 | ||
321 | |||
322 | # shift xmm7 to the right by 16-arg3 bytes | ||
323 | pxor mask1(%rip), %xmm0 | ||
324 | pshufb %xmm0, %xmm7 | ||
325 | pblendvb %xmm2, %xmm1 #xmm0 is implicit | ||
326 | |||
327 | # fold 16 Bytes | ||
328 | movdqa %xmm1, %xmm2 | ||
329 | movdqa %xmm7, %xmm8 | ||
330 | pclmulqdq $0x11, %xmm10, %xmm7 | ||
331 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
332 | pxor %xmm8, %xmm7 | ||
333 | pxor %xmm2, %xmm7 | ||
334 | |||
335 | _128_done: | ||
336 | # compute crc of a 128-bit value | ||
337 | movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 | ||
338 | movdqa %xmm7, %xmm0 | ||
339 | |||
340 | #64b fold | ||
341 | pclmulqdq $0x1, %xmm10, %xmm7 | ||
342 | pslldq $8 , %xmm0 | ||
343 | pxor %xmm0, %xmm7 | ||
344 | |||
345 | #32b fold | ||
346 | movdqa %xmm7, %xmm0 | ||
347 | |||
348 | pand mask2(%rip), %xmm0 | ||
349 | |||
350 | psrldq $12, %xmm7 | ||
351 | pclmulqdq $0x10, %xmm10, %xmm7 | ||
352 | pxor %xmm0, %xmm7 | ||
353 | |||
354 | #barrett reduction | ||
355 | _barrett: | ||
356 | movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 | ||
357 | movdqa %xmm7, %xmm0 | ||
358 | pclmulqdq $0x01, %xmm10, %xmm7 | ||
359 | pslldq $4, %xmm7 | ||
360 | pclmulqdq $0x11, %xmm10, %xmm7 | ||
361 | |||
362 | pslldq $4, %xmm7 | ||
363 | pxor %xmm0, %xmm7 | ||
364 | pextrd $1, %xmm7, %eax | ||
365 | |||
366 | _cleanup: | ||
367 | # scale the result back to 16 bits | ||
368 | shr $16, %eax | ||
369 | mov %rcx, %rsp | ||
370 | ret | ||
371 | |||
372 | ######################################################################## | ||
373 | |||
374 | .align 16 | ||
375 | _less_than_128: | ||
376 | |||
377 | # check if there is enough buffer to be able to fold 16B at a time | ||
378 | cmp $32, arg3 | ||
379 | jl _less_than_32 | ||
380 | movdqa SHUF_MASK(%rip), %xmm11 | ||
381 | |||
382 | # now if there is, load the constants | ||
383 | movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 | ||
384 | |||
385 | movd arg1_low32, %xmm0 # get the initial crc value | ||
386 | pslldq $12, %xmm0 # align it to its correct place | ||
387 | movdqu (arg2), %xmm7 # load the plaintext | ||
388 | pshufb %xmm11, %xmm7 # byte-reflect the plaintext | ||
389 | pxor %xmm0, %xmm7 | ||
390 | |||
391 | |||
392 | # update the buffer pointer | ||
393 | add $16, arg2 | ||
394 | |||
395 | # update the counter. subtract 32 instead of 16 to save one | ||
396 | # instruction from the loop | ||
397 | sub $32, arg3 | ||
398 | |||
399 | jmp _16B_reduction_loop | ||
400 | |||
401 | |||
402 | .align 16 | ||
403 | _less_than_32: | ||
404 | # mov initial crc to the return value. this is necessary for | ||
405 | # zero-length buffers. | ||
406 | mov arg1_low32, %eax | ||
407 | test arg3, arg3 | ||
408 | je _cleanup | ||
409 | |||
410 | movdqa SHUF_MASK(%rip), %xmm11 | ||
411 | |||
412 | movd arg1_low32, %xmm0 # get the initial crc value | ||
413 | pslldq $12, %xmm0 # align it to its correct place | ||
414 | |||
415 | cmp $16, arg3 | ||
416 | je _exact_16_left | ||
417 | jl _less_than_16_left | ||
418 | |||
419 | movdqu (arg2), %xmm7 # load the plaintext | ||
420 | pshufb %xmm11, %xmm7 # byte-reflect the plaintext | ||
421 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
422 | add $16, arg2 | ||
423 | sub $16, arg3 | ||
424 | movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 | ||
425 | jmp _get_last_two_xmms | ||
426 | |||
427 | |||
428 | .align 16 | ||
429 | _less_than_16_left: | ||
430 | # use stack space to load data less than 16 bytes, zero-out | ||
431 | # the 16B in memory first. | ||
432 | |||
433 | pxor %xmm1, %xmm1 | ||
434 | mov %rsp, %r11 | ||
435 | movdqa %xmm1, (%r11) | ||
436 | |||
437 | cmp $4, arg3 | ||
438 | jl _only_less_than_4 | ||
439 | |||
440 | # backup the counter value | ||
441 | mov arg3, %r9 | ||
442 | cmp $8, arg3 | ||
443 | jl _less_than_8_left | ||
444 | |||
445 | # load 8 Bytes | ||
446 | mov (arg2), %rax | ||
447 | mov %rax, (%r11) | ||
448 | add $8, %r11 | ||
449 | sub $8, arg3 | ||
450 | add $8, arg2 | ||
451 | _less_than_8_left: | ||
452 | |||
453 | cmp $4, arg3 | ||
454 | jl _less_than_4_left | ||
455 | |||
456 | # load 4 Bytes | ||
457 | mov (arg2), %eax | ||
458 | mov %eax, (%r11) | ||
459 | add $4, %r11 | ||
460 | sub $4, arg3 | ||
461 | add $4, arg2 | ||
462 | _less_than_4_left: | ||
463 | |||
464 | cmp $2, arg3 | ||
465 | jl _less_than_2_left | ||
466 | |||
467 | # load 2 Bytes | ||
468 | mov (arg2), %ax | ||
469 | mov %ax, (%r11) | ||
470 | add $2, %r11 | ||
471 | sub $2, arg3 | ||
472 | add $2, arg2 | ||
473 | _less_than_2_left: | ||
474 | cmp $1, arg3 | ||
475 | jl _zero_left | ||
476 | |||
477 | # load 1 Byte | ||
478 | mov (arg2), %al | ||
479 | mov %al, (%r11) | ||
480 | _zero_left: | ||
481 | movdqa (%rsp), %xmm7 | ||
482 | pshufb %xmm11, %xmm7 | ||
483 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
484 | |||
485 | # shl r9, 4 | ||
486 | lea pshufb_shf_table+16(%rip), %rax | ||
487 | sub %r9, %rax | ||
488 | movdqu (%rax), %xmm0 | ||
489 | pxor mask1(%rip), %xmm0 | ||
490 | |||
491 | pshufb %xmm0, %xmm7 | ||
492 | jmp _128_done | ||
493 | |||
494 | .align 16 | ||
495 | _exact_16_left: | ||
496 | movdqu (arg2), %xmm7 | ||
497 | pshufb %xmm11, %xmm7 | ||
498 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
499 | |||
500 | jmp _128_done | ||
501 | |||
502 | _only_less_than_4: | ||
503 | cmp $3, arg3 | ||
504 | jl _only_less_than_3 | ||
505 | |||
506 | # load 3 Bytes | ||
507 | mov (arg2), %al | ||
508 | mov %al, (%r11) | ||
509 | |||
510 | mov 1(arg2), %al | ||
511 | mov %al, 1(%r11) | ||
512 | |||
513 | mov 2(arg2), %al | ||
514 | mov %al, 2(%r11) | ||
515 | |||
516 | movdqa (%rsp), %xmm7 | ||
517 | pshufb %xmm11, %xmm7 | ||
518 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
519 | |||
520 | psrldq $5, %xmm7 | ||
521 | |||
522 | jmp _barrett | ||
523 | _only_less_than_3: | ||
524 | cmp $2, arg3 | ||
525 | jl _only_less_than_2 | ||
526 | |||
527 | # load 2 Bytes | ||
528 | mov (arg2), %al | ||
529 | mov %al, (%r11) | ||
530 | |||
531 | mov 1(arg2), %al | ||
532 | mov %al, 1(%r11) | ||
533 | |||
534 | movdqa (%rsp), %xmm7 | ||
535 | pshufb %xmm11, %xmm7 | ||
536 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
537 | |||
538 | psrldq $6, %xmm7 | ||
539 | |||
540 | jmp _barrett | ||
541 | _only_less_than_2: | ||
542 | |||
543 | # load 1 Byte | ||
544 | mov (arg2), %al | ||
545 | mov %al, (%r11) | ||
546 | |||
547 | movdqa (%rsp), %xmm7 | ||
548 | pshufb %xmm11, %xmm7 | ||
549 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
550 | |||
551 | psrldq $7, %xmm7 | ||
552 | |||
553 | jmp _barrett | ||
554 | |||
555 | ENDPROC(crc_t10dif_pcl) | ||
556 | |||
557 | .data | ||
558 | |||
559 | # precomputed constants | ||
560 | # these constants are precomputed from the poly: | ||
561 | # 0x8bb70000 (0x8bb7 scaled to 32 bits) | ||
562 | .align 16 | ||
563 | # Q = 0x18BB70000 | ||
564 | # rk1 = 2^(32*3) mod Q << 32 | ||
565 | # rk2 = 2^(32*5) mod Q << 32 | ||
566 | # rk3 = 2^(32*15) mod Q << 32 | ||
567 | # rk4 = 2^(32*17) mod Q << 32 | ||
568 | # rk5 = 2^(32*3) mod Q << 32 | ||
569 | # rk6 = 2^(32*2) mod Q << 32 | ||
570 | # rk7 = floor(2^64/Q) | ||
571 | # rk8 = Q | ||
572 | rk1: | ||
573 | .quad 0x2d56000000000000 | ||
574 | rk2: | ||
575 | .quad 0x06df000000000000 | ||
576 | rk3: | ||
577 | .quad 0x9d9d000000000000 | ||
578 | rk4: | ||
579 | .quad 0x7cf5000000000000 | ||
580 | rk5: | ||
581 | .quad 0x2d56000000000000 | ||
582 | rk6: | ||
583 | .quad 0x1368000000000000 | ||
584 | rk7: | ||
585 | .quad 0x00000001f65a57f8 | ||
586 | rk8: | ||
587 | .quad 0x000000018bb70000 | ||
588 | |||
589 | rk9: | ||
590 | .quad 0xceae000000000000 | ||
591 | rk10: | ||
592 | .quad 0xbfd6000000000000 | ||
593 | rk11: | ||
594 | .quad 0x1e16000000000000 | ||
595 | rk12: | ||
596 | .quad 0x713c000000000000 | ||
597 | rk13: | ||
598 | .quad 0xf7f9000000000000 | ||
599 | rk14: | ||
600 | .quad 0x80a6000000000000 | ||
601 | rk15: | ||
602 | .quad 0x044c000000000000 | ||
603 | rk16: | ||
604 | .quad 0xe658000000000000 | ||
605 | rk17: | ||
606 | .quad 0xad18000000000000 | ||
607 | rk18: | ||
608 | .quad 0xa497000000000000 | ||
609 | rk19: | ||
610 | .quad 0x6ee3000000000000 | ||
611 | rk20: | ||
612 | .quad 0xe7b5000000000000 | ||
613 | |||
614 | |||
615 | |||
616 | mask1: | ||
617 | .octa 0x80808080808080808080808080808080 | ||
618 | mask2: | ||
619 | .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF | ||
620 | |||
621 | SHUF_MASK: | ||
622 | .octa 0x000102030405060708090A0B0C0D0E0F | ||
623 | |||
624 | pshufb_shf_table: | ||
625 | # use these values for shift constants for the pshufb instruction | ||
626 | # different alignments result in values as shown: | ||
627 | # DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 | ||
628 | # DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 | ||
629 | # DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 | ||
630 | # DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 | ||
631 | # DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 | ||
632 | # DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 | ||
633 | # DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 | ||
634 | # DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 | ||
635 | # DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 | ||
636 | # DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 | ||
637 | # DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 | ||
638 | # DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 | ||
639 | # DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 | ||
640 | # DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 | ||
641 | # DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 | ||
642 | .octa 0x8f8e8d8c8b8a89888786858483828100 | ||
643 | .octa 0x000e0d0c0b0a09080706050403020100 | ||
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c new file mode 100644 index 000000000000..7845d7fd54c0 --- /dev/null +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions | ||
5 | * | ||
6 | * Copyright (C) 2013 Intel Corporation | ||
7 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License as published by the Free | ||
11 | * Software Foundation; either version 2 of the License, or (at your option) | ||
12 | * any later version. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
15 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
16 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
17 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
18 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
19 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
21 | * SOFTWARE. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/crc-t10dif.h> | ||
28 | #include <crypto/internal/hash.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/string.h> | ||
31 | #include <linux/kernel.h> | ||
32 | #include <asm/i387.h> | ||
33 | #include <asm/cpufeature.h> | ||
34 | #include <asm/cpu_device_id.h> | ||
35 | |||
36 | asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, | ||
37 | size_t len); | ||
38 | |||
39 | struct chksum_desc_ctx { | ||
40 | __u16 crc; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Steps through buffer one byte at at time, calculates reflected | ||
45 | * crc using table. | ||
46 | */ | ||
47 | |||
48 | static int chksum_init(struct shash_desc *desc) | ||
49 | { | ||
50 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
51 | |||
52 | ctx->crc = 0; | ||
53 | |||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static int chksum_update(struct shash_desc *desc, const u8 *data, | ||
58 | unsigned int length) | ||
59 | { | ||
60 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
61 | |||
62 | if (irq_fpu_usable()) { | ||
63 | kernel_fpu_begin(); | ||
64 | ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); | ||
65 | kernel_fpu_end(); | ||
66 | } else | ||
67 | ctx->crc = crc_t10dif_generic(ctx->crc, data, length); | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int chksum_final(struct shash_desc *desc, u8 *out) | ||
72 | { | ||
73 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
74 | |||
75 | *(__u16 *)out = ctx->crc; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, | ||
80 | u8 *out) | ||
81 | { | ||
82 | if (irq_fpu_usable()) { | ||
83 | kernel_fpu_begin(); | ||
84 | *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); | ||
85 | kernel_fpu_end(); | ||
86 | } else | ||
87 | *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static int chksum_finup(struct shash_desc *desc, const u8 *data, | ||
92 | unsigned int len, u8 *out) | ||
93 | { | ||
94 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
95 | |||
96 | return __chksum_finup(&ctx->crc, data, len, out); | ||
97 | } | ||
98 | |||
99 | static int chksum_digest(struct shash_desc *desc, const u8 *data, | ||
100 | unsigned int length, u8 *out) | ||
101 | { | ||
102 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
103 | |||
104 | return __chksum_finup(&ctx->crc, data, length, out); | ||
105 | } | ||
106 | |||
107 | static struct shash_alg alg = { | ||
108 | .digestsize = CRC_T10DIF_DIGEST_SIZE, | ||
109 | .init = chksum_init, | ||
110 | .update = chksum_update, | ||
111 | .final = chksum_final, | ||
112 | .finup = chksum_finup, | ||
113 | .digest = chksum_digest, | ||
114 | .descsize = sizeof(struct chksum_desc_ctx), | ||
115 | .base = { | ||
116 | .cra_name = "crct10dif", | ||
117 | .cra_driver_name = "crct10dif-pclmul", | ||
118 | .cra_priority = 200, | ||
119 | .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, | ||
120 | .cra_module = THIS_MODULE, | ||
121 | } | ||
122 | }; | ||
123 | |||
124 | static const struct x86_cpu_id crct10dif_cpu_id[] = { | ||
125 | X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), | ||
126 | {} | ||
127 | }; | ||
128 | MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); | ||
129 | |||
130 | static int __init crct10dif_intel_mod_init(void) | ||
131 | { | ||
132 | if (!x86_match_cpu(crct10dif_cpu_id)) | ||
133 | return -ENODEV; | ||
134 | |||
135 | return crypto_register_shash(&alg); | ||
136 | } | ||
137 | |||
138 | static void __exit crct10dif_intel_mod_fini(void) | ||
139 | { | ||
140 | crypto_unregister_shash(&alg); | ||
141 | } | ||
142 | |||
143 | module_init(crct10dif_intel_mod_init); | ||
144 | module_exit(crct10dif_intel_mod_fini); | ||
145 | |||
146 | MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); | ||
147 | MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); | ||
148 | MODULE_LICENSE("GPL"); | ||
149 | |||
150 | MODULE_ALIAS("crct10dif"); | ||
151 | MODULE_ALIAS("crct10dif-pclmul"); | ||
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h index c09241659971..b4b38bacb404 100644 --- a/arch/x86/include/asm/dma-contiguous.h +++ b/arch/x86/include/asm/dma-contiguous.h | |||
@@ -4,7 +4,6 @@ | |||
4 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
5 | 5 | ||
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <asm-generic/dma-contiguous.h> | ||
8 | 7 | ||
9 | static inline void | 8 | static inline void |
10 | dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } | 9 | dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 3a16c1483b45..64507f35800c 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -3,18 +3,23 @@ | |||
3 | 3 | ||
4 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
5 | 5 | ||
6 | #include <linux/stringify.h> | ||
6 | #include <linux/types.h> | 7 | #include <linux/types.h> |
7 | #include <asm/nops.h> | 8 | #include <asm/nops.h> |
8 | #include <asm/asm.h> | 9 | #include <asm/asm.h> |
9 | 10 | ||
10 | #define JUMP_LABEL_NOP_SIZE 5 | 11 | #define JUMP_LABEL_NOP_SIZE 5 |
11 | 12 | ||
12 | #define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" | 13 | #ifdef CONFIG_X86_64 |
14 | # define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC | ||
15 | #else | ||
16 | # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC | ||
17 | #endif | ||
13 | 18 | ||
14 | static __always_inline bool arch_static_branch(struct static_key *key) | 19 | static __always_inline bool arch_static_branch(struct static_key *key) |
15 | { | 20 | { |
16 | asm goto("1:" | 21 | asm goto("1:" |
17 | STATIC_KEY_INITIAL_NOP | 22 | ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" |
18 | ".pushsection __jump_table, \"aw\" \n\t" | 23 | ".pushsection __jump_table, \"aw\" \n\t" |
19 | _ASM_ALIGN "\n\t" | 24 | _ASM_ALIGN "\n\t" |
20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" | 25 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f87f7fcefa0a..c76ff74a98f2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -286,6 +286,7 @@ struct kvm_mmu { | |||
286 | u64 *pae_root; | 286 | u64 *pae_root; |
287 | u64 *lm_root; | 287 | u64 *lm_root; |
288 | u64 rsvd_bits_mask[2][4]; | 288 | u64 rsvd_bits_mask[2][4]; |
289 | u64 bad_mt_xwr; | ||
289 | 290 | ||
290 | /* | 291 | /* |
291 | * Bitmap: bit set = last pte in walk | 292 | * Bitmap: bit set = last pte in walk |
@@ -323,6 +324,7 @@ struct kvm_pmu { | |||
323 | u64 global_ovf_ctrl; | 324 | u64 global_ovf_ctrl; |
324 | u64 counter_bitmask[2]; | 325 | u64 counter_bitmask[2]; |
325 | u64 global_ctrl_mask; | 326 | u64 global_ctrl_mask; |
327 | u64 reserved_bits; | ||
326 | u8 version; | 328 | u8 version; |
327 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; | 329 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; |
328 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; | 330 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; |
@@ -511,6 +513,14 @@ struct kvm_vcpu_arch { | |||
511 | * instruction. | 513 | * instruction. |
512 | */ | 514 | */ |
513 | bool write_fault_to_shadow_pgtable; | 515 | bool write_fault_to_shadow_pgtable; |
516 | |||
517 | /* set at EPT violation at this point */ | ||
518 | unsigned long exit_qualification; | ||
519 | |||
520 | /* pv related host specific info */ | ||
521 | struct { | ||
522 | bool pv_unhalted; | ||
523 | } pv; | ||
514 | }; | 524 | }; |
515 | 525 | ||
516 | struct kvm_lpage_info { | 526 | struct kvm_lpage_info { |
@@ -802,8 +812,8 @@ extern u32 kvm_min_guest_tsc_khz; | |||
802 | extern u32 kvm_max_guest_tsc_khz; | 812 | extern u32 kvm_max_guest_tsc_khz; |
803 | 813 | ||
804 | enum emulation_result { | 814 | enum emulation_result { |
805 | EMULATE_DONE, /* no further processing */ | 815 | EMULATE_DONE, /* no further processing */ |
806 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ | 816 | EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ |
807 | EMULATE_FAIL, /* can't emulate this instruction */ | 817 | EMULATE_FAIL, /* can't emulate this instruction */ |
808 | }; | 818 | }; |
809 | 819 | ||
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d9e9e6c7ed32..7d7443283a9d 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -100,29 +100,6 @@ static inline void early_quirks(void) { } | |||
100 | extern void pci_iommu_alloc(void); | 100 | extern void pci_iommu_alloc(void); |
101 | 101 | ||
102 | #ifdef CONFIG_PCI_MSI | 102 | #ifdef CONFIG_PCI_MSI |
103 | /* MSI arch specific hooks */ | ||
104 | static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | ||
105 | { | ||
106 | return x86_msi.setup_msi_irqs(dev, nvec, type); | ||
107 | } | ||
108 | |||
109 | static inline void x86_teardown_msi_irqs(struct pci_dev *dev) | ||
110 | { | ||
111 | x86_msi.teardown_msi_irqs(dev); | ||
112 | } | ||
113 | |||
114 | static inline void x86_teardown_msi_irq(unsigned int irq) | ||
115 | { | ||
116 | x86_msi.teardown_msi_irq(irq); | ||
117 | } | ||
118 | static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) | ||
119 | { | ||
120 | x86_msi.restore_msi_irqs(dev, irq); | ||
121 | } | ||
122 | #define arch_setup_msi_irqs x86_setup_msi_irqs | ||
123 | #define arch_teardown_msi_irqs x86_teardown_msi_irqs | ||
124 | #define arch_teardown_msi_irq x86_teardown_msi_irq | ||
125 | #define arch_restore_msi_irqs x86_restore_msi_irqs | ||
126 | /* implemented in arch/x86/kernel/apic/io_apic. */ | 103 | /* implemented in arch/x86/kernel/apic/io_apic. */ |
127 | struct msi_desc; | 104 | struct msi_desc; |
128 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); | 105 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); |
@@ -130,16 +107,9 @@ void native_teardown_msi_irq(unsigned int irq); | |||
130 | void native_restore_msi_irqs(struct pci_dev *dev, int irq); | 107 | void native_restore_msi_irqs(struct pci_dev *dev, int irq); |
131 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, | 108 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
132 | unsigned int irq_base, unsigned int irq_offset); | 109 | unsigned int irq_base, unsigned int irq_offset); |
133 | /* default to the implementation in drivers/lib/msi.c */ | ||
134 | #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS | ||
135 | #define HAVE_DEFAULT_MSI_RESTORE_IRQS | ||
136 | void default_teardown_msi_irqs(struct pci_dev *dev); | ||
137 | void default_restore_msi_irqs(struct pci_dev *dev, int irq); | ||
138 | #else | 110 | #else |
139 | #define native_setup_msi_irqs NULL | 111 | #define native_setup_msi_irqs NULL |
140 | #define native_teardown_msi_irq NULL | 112 | #define native_teardown_msi_irq NULL |
141 | #define default_teardown_msi_irqs NULL | ||
142 | #define default_restore_msi_irqs NULL | ||
143 | #endif | 113 | #endif |
144 | 114 | ||
145 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) | 115 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8d16befdec88..3d1999458709 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -315,21 +315,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) | |||
315 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); | 315 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); |
316 | } | 316 | } |
317 | 317 | ||
318 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | ||
319 | { | ||
320 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
321 | } | ||
322 | |||
323 | static inline int pte_swp_soft_dirty(pte_t pte) | ||
324 | { | ||
325 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | ||
326 | } | ||
327 | |||
328 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | ||
329 | { | ||
330 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
331 | } | ||
332 | |||
333 | static inline pte_t pte_file_clear_soft_dirty(pte_t pte) | 318 | static inline pte_t pte_file_clear_soft_dirty(pte_t pte) |
334 | { | 319 | { |
335 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); | 320 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); |
@@ -446,6 +431,7 @@ pte_t *populate_extra_pte(unsigned long vaddr); | |||
446 | 431 | ||
447 | #ifndef __ASSEMBLY__ | 432 | #ifndef __ASSEMBLY__ |
448 | #include <linux/mm_types.h> | 433 | #include <linux/mm_types.h> |
434 | #include <linux/mmdebug.h> | ||
449 | #include <linux/log2.h> | 435 | #include <linux/log2.h> |
450 | 436 | ||
451 | static inline int pte_none(pte_t pte) | 437 | static inline int pte_none(pte_t pte) |
@@ -864,6 +850,24 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, | |||
864 | { | 850 | { |
865 | } | 851 | } |
866 | 852 | ||
853 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | ||
854 | { | ||
855 | VM_BUG_ON(pte_present(pte)); | ||
856 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
857 | } | ||
858 | |||
859 | static inline int pte_swp_soft_dirty(pte_t pte) | ||
860 | { | ||
861 | VM_BUG_ON(pte_present(pte)); | ||
862 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | ||
863 | } | ||
864 | |||
865 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | ||
866 | { | ||
867 | VM_BUG_ON(pte_present(pte)); | ||
868 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
869 | } | ||
870 | |||
867 | #include <asm-generic/pgtable.h> | 871 | #include <asm-generic/pgtable.h> |
868 | #endif /* __ASSEMBLY__ */ | 872 | #endif /* __ASSEMBLY__ */ |
869 | 873 | ||
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f4843e031131..0ecac257fb26 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -75,6 +75,9 @@ | |||
75 | * with swap entry format. On x86 bits 6 and 7 are *not* involved | 75 | * with swap entry format. On x86 bits 6 and 7 are *not* involved |
76 | * into swap entry computation, but bit 6 is used for nonlinear | 76 | * into swap entry computation, but bit 6 is used for nonlinear |
77 | * file mapping, so we borrow bit 7 for soft dirty tracking. | 77 | * file mapping, so we borrow bit 7 for soft dirty tracking. |
78 | * | ||
79 | * Please note that this bit must be treated as swap dirty page | ||
80 | * mark if and only if the PTE has present bit clear! | ||
78 | */ | 81 | */ |
79 | #ifdef CONFIG_MEM_SOFT_DIRTY | 82 | #ifdef CONFIG_MEM_SOFT_DIRTY |
80 | #define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE | 83 | #define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 109a9dd5d454..be8269b00e2a 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -93,7 +93,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
93 | 93 | ||
94 | struct pvclock_vsyscall_time_info { | 94 | struct pvclock_vsyscall_time_info { |
95 | struct pvclock_vcpu_time_info pvti; | 95 | struct pvclock_vcpu_time_info pvti; |
96 | u32 migrate_count; | ||
97 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 96 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
98 | 97 | ||
99 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) | 98 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index cf512003e663..e6d90babc245 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -62,6 +62,7 @@ static inline void __flush_tlb_all(void) | |||
62 | 62 | ||
63 | static inline void __flush_tlb_one(unsigned long addr) | 63 | static inline void __flush_tlb_one(unsigned long addr) |
64 | { | 64 | { |
65 | count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); | ||
65 | __flush_tlb_single(addr); | 66 | __flush_tlb_single(addr); |
66 | } | 67 | } |
67 | 68 | ||
@@ -84,14 +85,38 @@ static inline void __flush_tlb_one(unsigned long addr) | |||
84 | 85 | ||
85 | #ifndef CONFIG_SMP | 86 | #ifndef CONFIG_SMP |
86 | 87 | ||
87 | #define flush_tlb() __flush_tlb() | 88 | /* "_up" is for UniProcessor. |
88 | #define flush_tlb_all() __flush_tlb_all() | 89 | * |
89 | #define local_flush_tlb() __flush_tlb() | 90 | * This is a helper for other header functions. *Not* intended to be called |
91 | * directly. All global TLB flushes need to either call this, or to bump the | ||
92 | * vm statistics themselves. | ||
93 | */ | ||
94 | static inline void __flush_tlb_up(void) | ||
95 | { | ||
96 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
97 | __flush_tlb(); | ||
98 | } | ||
99 | |||
100 | static inline void flush_tlb_all(void) | ||
101 | { | ||
102 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
103 | __flush_tlb_all(); | ||
104 | } | ||
105 | |||
106 | static inline void flush_tlb(void) | ||
107 | { | ||
108 | __flush_tlb_up(); | ||
109 | } | ||
110 | |||
111 | static inline void local_flush_tlb(void) | ||
112 | { | ||
113 | __flush_tlb_up(); | ||
114 | } | ||
90 | 115 | ||
91 | static inline void flush_tlb_mm(struct mm_struct *mm) | 116 | static inline void flush_tlb_mm(struct mm_struct *mm) |
92 | { | 117 | { |
93 | if (mm == current->active_mm) | 118 | if (mm == current->active_mm) |
94 | __flush_tlb(); | 119 | __flush_tlb_up(); |
95 | } | 120 | } |
96 | 121 | ||
97 | static inline void flush_tlb_page(struct vm_area_struct *vma, | 122 | static inline void flush_tlb_page(struct vm_area_struct *vma, |
@@ -105,14 +130,14 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, | |||
105 | unsigned long start, unsigned long end) | 130 | unsigned long start, unsigned long end) |
106 | { | 131 | { |
107 | if (vma->vm_mm == current->active_mm) | 132 | if (vma->vm_mm == current->active_mm) |
108 | __flush_tlb(); | 133 | __flush_tlb_up(); |
109 | } | 134 | } |
110 | 135 | ||
111 | static inline void flush_tlb_mm_range(struct mm_struct *mm, | 136 | static inline void flush_tlb_mm_range(struct mm_struct *mm, |
112 | unsigned long start, unsigned long end, unsigned long vmflag) | 137 | unsigned long start, unsigned long end, unsigned long vmflag) |
113 | { | 138 | { |
114 | if (mm == current->active_mm) | 139 | if (mm == current->active_mm) |
115 | __flush_tlb(); | 140 | __flush_tlb_up(); |
116 | } | 141 | } |
117 | 142 | ||
118 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, | 143 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f3e01a2cbaa1..966502d4682e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -387,6 +387,7 @@ enum vmcs_field { | |||
387 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 | 387 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 |
388 | #define VMX_EPT_EXTENT_CONTEXT 1 | 388 | #define VMX_EPT_EXTENT_CONTEXT 1 |
389 | #define VMX_EPT_EXTENT_GLOBAL 2 | 389 | #define VMX_EPT_EXTENT_GLOBAL 2 |
390 | #define VMX_EPT_EXTENT_SHIFT 24 | ||
390 | 391 | ||
391 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) | 392 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) |
392 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) | 393 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) |
@@ -394,6 +395,7 @@ enum vmcs_field { | |||
394 | #define VMX_EPTP_WB_BIT (1ull << 14) | 395 | #define VMX_EPTP_WB_BIT (1ull << 14) |
395 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 396 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
396 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) | 397 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) |
398 | #define VMX_EPT_INVEPT_BIT (1ull << 20) | ||
397 | #define VMX_EPT_AD_BIT (1ull << 21) | 399 | #define VMX_EPT_AD_BIT (1ull << 21) |
398 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 400 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
399 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 401 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index ca842f2769ef..608a79d5a466 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h | |||
@@ -7,6 +7,7 @@ enum ipi_vector { | |||
7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, | 7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, |
8 | XEN_SPIN_UNLOCK_VECTOR, | 8 | XEN_SPIN_UNLOCK_VECTOR, |
9 | XEN_IRQ_WORK_VECTOR, | 9 | XEN_IRQ_WORK_VECTOR, |
10 | XEN_NMI_VECTOR, | ||
10 | 11 | ||
11 | XEN_NR_IPIS, | 12 | XEN_NR_IPIS, |
12 | }; | 13 | }; |
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 7ea79c5fa1f2..492b29802f57 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h | |||
@@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = { | |||
167 | 167 | ||
168 | #define AVX_XOR_SPEED \ | 168 | #define AVX_XOR_SPEED \ |
169 | do { \ | 169 | do { \ |
170 | if (cpu_has_avx) \ | 170 | if (cpu_has_avx && cpu_has_osxsave) \ |
171 | xor_speed(&xor_block_avx); \ | 171 | xor_speed(&xor_block_avx); \ |
172 | } while (0) | 172 | } while (0) |
173 | 173 | ||
174 | #define AVX_SELECT(FASTEST) \ | 174 | #define AVX_SELECT(FASTEST) \ |
175 | (cpu_has_avx ? &xor_block_avx : FASTEST) | 175 | (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST) |
176 | 176 | ||
177 | #else | 177 | #else |
178 | 178 | ||
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d651082c7cf7..0e79420376eb 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -65,6 +65,7 @@ | |||
65 | #define EXIT_REASON_EOI_INDUCED 45 | 65 | #define EXIT_REASON_EOI_INDUCED 45 |
66 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_INVEPT 50 | ||
68 | #define EXIT_REASON_PREEMPTION_TIMER 52 | 69 | #define EXIT_REASON_PREEMPTION_TIMER 52 |
69 | #define EXIT_REASON_WBINVD 54 | 70 | #define EXIT_REASON_WBINVD 54 |
70 | #define EXIT_REASON_XSETBV 55 | 71 | #define EXIT_REASON_XSETBV 55 |
@@ -106,12 +107,13 @@ | |||
106 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | 107 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ |
107 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | 108 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ |
108 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | 109 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ |
110 | { EXIT_REASON_INVEPT, "INVEPT" }, \ | ||
111 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ | ||
109 | { EXIT_REASON_WBINVD, "WBINVD" }, \ | 112 | { EXIT_REASON_WBINVD, "WBINVD" }, \ |
110 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ | 113 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ |
111 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 114 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
112 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 115 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
113 | { EXIT_REASON_INVD, "INVD" }, \ | 116 | { EXIT_REASON_INVD, "INVD" }, \ |
114 | { EXIT_REASON_INVPCID, "INVPCID" }, \ | 117 | { EXIT_REASON_INVPCID, "INVPCID" } |
115 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } | ||
116 | 118 | ||
117 | #endif /* _UAPIVMX_H */ | 119 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index d4cdfa67509e..ce2d0a2c3e4f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -683,6 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
683 | } | 683 | } |
684 | 684 | ||
685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ | 685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ |
686 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
686 | __flush_tlb(); | 687 | __flush_tlb(); |
687 | 688 | ||
688 | /* Save MTRR state */ | 689 | /* Save MTRR state */ |
@@ -696,6 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
696 | static void post_set(void) __releases(set_atomicity_lock) | 697 | static void post_set(void) __releases(set_atomicity_lock) |
697 | { | 698 | { |
698 | /* Flush TLBs (no need to flush caches - they are disabled) */ | 699 | /* Flush TLBs (no need to flush caches - they are disabled) */ |
700 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
699 | __flush_tlb(); | 701 | __flush_tlb(); |
700 | 702 | ||
701 | /* Intel (P6) standard MTRRs */ | 703 | /* Intel (P6) standard MTRRs */ |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 69eb2fa25494..376dc7873447 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) | |||
52 | } | 52 | } |
53 | 53 | ||
54 | #ifdef CONFIG_BLK_DEV_INITRD | 54 | #ifdef CONFIG_BLK_DEV_INITRD |
55 | void __init early_init_dt_setup_initrd_arch(unsigned long start, | 55 | void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) |
56 | unsigned long end) | ||
57 | { | 56 | { |
58 | initrd_start = (unsigned long)__va(start); | 57 | initrd_start = (unsigned long)__va(start); |
59 | initrd_end = (unsigned long)__va(end); | 58 | initrd_end = (unsigned long)__va(end); |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 63bdb29b2549..b3cd3ebae077 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
13 | #include <linux/acpi.h> | 13 | #include <linux/acpi.h> |
14 | #include <linux/pci_ids.h> | 14 | #include <linux/pci_ids.h> |
15 | #include <drm/i915_drm.h> | ||
15 | #include <asm/pci-direct.h> | 16 | #include <asm/pci-direct.h> |
16 | #include <asm/dma.h> | 17 | #include <asm/dma.h> |
17 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
@@ -216,6 +217,157 @@ static void __init intel_remapping_check(int num, int slot, int func) | |||
216 | 217 | ||
217 | } | 218 | } |
218 | 219 | ||
220 | /* | ||
221 | * Systems with Intel graphics controllers set aside memory exclusively | ||
222 | * for gfx driver use. This memory is not marked in the E820 as reserved | ||
223 | * or as RAM, and so is subject to overlap from E820 manipulation later | ||
224 | * in the boot process. On some systems, MMIO space is allocated on top, | ||
225 | * despite the efforts of the "RAM buffer" approach, which simply rounds | ||
226 | * memory boundaries up to 64M to try to catch space that may decode | ||
227 | * as RAM and so is not suitable for MMIO. | ||
228 | * | ||
229 | * And yes, so far on current devices the base addr is always under 4G. | ||
230 | */ | ||
231 | static u32 __init intel_stolen_base(int num, int slot, int func) | ||
232 | { | ||
233 | u32 base; | ||
234 | |||
235 | /* | ||
236 | * For the PCI IDs in this quirk, the stolen base is always | ||
237 | * in 0x5c, aka the BDSM register (yes that's really what | ||
238 | * it's called). | ||
239 | */ | ||
240 | base = read_pci_config(num, slot, func, 0x5c); | ||
241 | base &= ~((1<<20) - 1); | ||
242 | |||
243 | return base; | ||
244 | } | ||
245 | |||
246 | #define KB(x) ((x) * 1024) | ||
247 | #define MB(x) (KB (KB (x))) | ||
248 | #define GB(x) (MB (KB (x))) | ||
249 | |||
250 | static size_t __init gen3_stolen_size(int num, int slot, int func) | ||
251 | { | ||
252 | size_t stolen_size; | ||
253 | u16 gmch_ctrl; | ||
254 | |||
255 | gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL); | ||
256 | |||
257 | switch (gmch_ctrl & I855_GMCH_GMS_MASK) { | ||
258 | case I855_GMCH_GMS_STOLEN_1M: | ||
259 | stolen_size = MB(1); | ||
260 | break; | ||
261 | case I855_GMCH_GMS_STOLEN_4M: | ||
262 | stolen_size = MB(4); | ||
263 | break; | ||
264 | case I855_GMCH_GMS_STOLEN_8M: | ||
265 | stolen_size = MB(8); | ||
266 | break; | ||
267 | case I855_GMCH_GMS_STOLEN_16M: | ||
268 | stolen_size = MB(16); | ||
269 | break; | ||
270 | case I855_GMCH_GMS_STOLEN_32M: | ||
271 | stolen_size = MB(32); | ||
272 | break; | ||
273 | case I915_GMCH_GMS_STOLEN_48M: | ||
274 | stolen_size = MB(48); | ||
275 | break; | ||
276 | case I915_GMCH_GMS_STOLEN_64M: | ||
277 | stolen_size = MB(64); | ||
278 | break; | ||
279 | case G33_GMCH_GMS_STOLEN_128M: | ||
280 | stolen_size = MB(128); | ||
281 | break; | ||
282 | case G33_GMCH_GMS_STOLEN_256M: | ||
283 | stolen_size = MB(256); | ||
284 | break; | ||
285 | case INTEL_GMCH_GMS_STOLEN_96M: | ||
286 | stolen_size = MB(96); | ||
287 | break; | ||
288 | case INTEL_GMCH_GMS_STOLEN_160M: | ||
289 | stolen_size = MB(160); | ||
290 | break; | ||
291 | case INTEL_GMCH_GMS_STOLEN_224M: | ||
292 | stolen_size = MB(224); | ||
293 | break; | ||
294 | case INTEL_GMCH_GMS_STOLEN_352M: | ||
295 | stolen_size = MB(352); | ||
296 | break; | ||
297 | default: | ||
298 | stolen_size = 0; | ||
299 | break; | ||
300 | } | ||
301 | |||
302 | return stolen_size; | ||
303 | } | ||
304 | |||
305 | static size_t __init gen6_stolen_size(int num, int slot, int func) | ||
306 | { | ||
307 | u16 gmch_ctrl; | ||
308 | |||
309 | gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); | ||
310 | gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; | ||
311 | gmch_ctrl &= SNB_GMCH_GMS_MASK; | ||
312 | |||
313 | return gmch_ctrl << 25; /* 32 MB units */ | ||
314 | } | ||
315 | |||
316 | typedef size_t (*stolen_size_fn)(int num, int slot, int func); | ||
317 | |||
318 | static struct pci_device_id intel_stolen_ids[] __initdata = { | ||
319 | INTEL_I915G_IDS(gen3_stolen_size), | ||
320 | INTEL_I915GM_IDS(gen3_stolen_size), | ||
321 | INTEL_I945G_IDS(gen3_stolen_size), | ||
322 | INTEL_I945GM_IDS(gen3_stolen_size), | ||
323 | INTEL_VLV_M_IDS(gen3_stolen_size), | ||
324 | INTEL_VLV_D_IDS(gen3_stolen_size), | ||
325 | INTEL_PINEVIEW_IDS(gen3_stolen_size), | ||
326 | INTEL_I965G_IDS(gen3_stolen_size), | ||
327 | INTEL_G33_IDS(gen3_stolen_size), | ||
328 | INTEL_I965GM_IDS(gen3_stolen_size), | ||
329 | INTEL_GM45_IDS(gen3_stolen_size), | ||
330 | INTEL_G45_IDS(gen3_stolen_size), | ||
331 | INTEL_IRONLAKE_D_IDS(gen3_stolen_size), | ||
332 | INTEL_IRONLAKE_M_IDS(gen3_stolen_size), | ||
333 | INTEL_SNB_D_IDS(gen6_stolen_size), | ||
334 | INTEL_SNB_M_IDS(gen6_stolen_size), | ||
335 | INTEL_IVB_M_IDS(gen6_stolen_size), | ||
336 | INTEL_IVB_D_IDS(gen6_stolen_size), | ||
337 | INTEL_HSW_D_IDS(gen6_stolen_size), | ||
338 | INTEL_HSW_M_IDS(gen6_stolen_size), | ||
339 | }; | ||
340 | |||
341 | static void __init intel_graphics_stolen(int num, int slot, int func) | ||
342 | { | ||
343 | size_t size; | ||
344 | int i; | ||
345 | u32 start; | ||
346 | u16 device, subvendor, subdevice; | ||
347 | |||
348 | device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); | ||
349 | subvendor = read_pci_config_16(num, slot, func, | ||
350 | PCI_SUBSYSTEM_VENDOR_ID); | ||
351 | subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID); | ||
352 | |||
353 | for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) { | ||
354 | if (intel_stolen_ids[i].device == device) { | ||
355 | stolen_size_fn stolen_size = | ||
356 | (stolen_size_fn)intel_stolen_ids[i].driver_data; | ||
357 | size = stolen_size(num, slot, func); | ||
358 | start = intel_stolen_base(num, slot, func); | ||
359 | if (size && start) { | ||
360 | /* Mark this space as reserved */ | ||
361 | e820_add_region(start, size, E820_RESERVED); | ||
362 | sanitize_e820_map(e820.map, | ||
363 | ARRAY_SIZE(e820.map), | ||
364 | &e820.nr_map); | ||
365 | } | ||
366 | return; | ||
367 | } | ||
368 | } | ||
369 | } | ||
370 | |||
219 | #define QFLAG_APPLY_ONCE 0x1 | 371 | #define QFLAG_APPLY_ONCE 0x1 |
220 | #define QFLAG_APPLIED 0x2 | 372 | #define QFLAG_APPLIED 0x2 |
221 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 373 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
@@ -251,6 +403,8 @@ static struct chipset early_qrk[] __initdata = { | |||
251 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, | 403 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, |
252 | { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, | 404 | { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, |
253 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, | 405 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, |
406 | { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, | ||
407 | QFLAG_APPLY_ONCE, intel_graphics_stolen }, | ||
254 | {} | 408 | {} |
255 | }; | 409 | }; |
256 | 410 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2cfbc3a3a2dd..f0dcb0ceb6a2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1176,6 +1176,9 @@ ftrace_restore_flags: | |||
1176 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 1176 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
1177 | 1177 | ||
1178 | ENTRY(mcount) | 1178 | ENTRY(mcount) |
1179 | cmpl $__PAGE_OFFSET, %esp | ||
1180 | jb ftrace_stub /* Paging not enabled yet? */ | ||
1181 | |||
1179 | cmpl $0, function_trace_stop | 1182 | cmpl $0, function_trace_stop |
1180 | jne ftrace_stub | 1183 | jne ftrace_stub |
1181 | 1184 | ||
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 460f5d9ceebb..ee11b7dfbfbb 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -24,18 +24,57 @@ union jump_code_union { | |||
24 | } __attribute__((packed)); | 24 | } __attribute__((packed)); |
25 | }; | 25 | }; |
26 | 26 | ||
27 | static void bug_at(unsigned char *ip, int line) | ||
28 | { | ||
29 | /* | ||
30 | * The location is not an op that we were expecting. | ||
31 | * Something went wrong. Crash the box, as something could be | ||
32 | * corrupting the kernel. | ||
33 | */ | ||
34 | pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n", | ||
35 | ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line); | ||
36 | BUG(); | ||
37 | } | ||
38 | |||
27 | static void __jump_label_transform(struct jump_entry *entry, | 39 | static void __jump_label_transform(struct jump_entry *entry, |
28 | enum jump_label_type type, | 40 | enum jump_label_type type, |
29 | void *(*poker)(void *, const void *, size_t)) | 41 | void *(*poker)(void *, const void *, size_t), |
42 | int init) | ||
30 | { | 43 | { |
31 | union jump_code_union code; | 44 | union jump_code_union code; |
45 | const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; | ||
32 | 46 | ||
33 | if (type == JUMP_LABEL_ENABLE) { | 47 | if (type == JUMP_LABEL_ENABLE) { |
48 | /* | ||
49 | * We are enabling this jump label. If it is not a nop | ||
50 | * then something must have gone wrong. | ||
51 | */ | ||
52 | if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0)) | ||
53 | bug_at((void *)entry->code, __LINE__); | ||
54 | |||
34 | code.jump = 0xe9; | 55 | code.jump = 0xe9; |
35 | code.offset = entry->target - | 56 | code.offset = entry->target - |
36 | (entry->code + JUMP_LABEL_NOP_SIZE); | 57 | (entry->code + JUMP_LABEL_NOP_SIZE); |
37 | } else | 58 | } else { |
59 | /* | ||
60 | * We are disabling this jump label. If it is not what | ||
61 | * we think it is, then something must have gone wrong. | ||
62 | * If this is the first initialization call, then we | ||
63 | * are converting the default nop to the ideal nop. | ||
64 | */ | ||
65 | if (init) { | ||
66 | const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; | ||
67 | if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0)) | ||
68 | bug_at((void *)entry->code, __LINE__); | ||
69 | } else { | ||
70 | code.jump = 0xe9; | ||
71 | code.offset = entry->target - | ||
72 | (entry->code + JUMP_LABEL_NOP_SIZE); | ||
73 | if (unlikely(memcmp((void *)entry->code, &code, 5) != 0)) | ||
74 | bug_at((void *)entry->code, __LINE__); | ||
75 | } | ||
38 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); | 76 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); |
77 | } | ||
39 | 78 | ||
40 | /* | 79 | /* |
41 | * Make text_poke_bp() a default fallback poker. | 80 | * Make text_poke_bp() a default fallback poker. |
@@ -57,15 +96,38 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
57 | { | 96 | { |
58 | get_online_cpus(); | 97 | get_online_cpus(); |
59 | mutex_lock(&text_mutex); | 98 | mutex_lock(&text_mutex); |
60 | __jump_label_transform(entry, type, NULL); | 99 | __jump_label_transform(entry, type, NULL, 0); |
61 | mutex_unlock(&text_mutex); | 100 | mutex_unlock(&text_mutex); |
62 | put_online_cpus(); | 101 | put_online_cpus(); |
63 | } | 102 | } |
64 | 103 | ||
104 | static enum { | ||
105 | JL_STATE_START, | ||
106 | JL_STATE_NO_UPDATE, | ||
107 | JL_STATE_UPDATE, | ||
108 | } jlstate __initdata_or_module = JL_STATE_START; | ||
109 | |||
65 | __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, | 110 | __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, |
66 | enum jump_label_type type) | 111 | enum jump_label_type type) |
67 | { | 112 | { |
68 | __jump_label_transform(entry, type, text_poke_early); | 113 | /* |
114 | * This function is called at boot up and when modules are | ||
115 | * first loaded. Check if the default nop, the one that is | ||
116 | * inserted at compile time, is the ideal nop. If it is, then | ||
117 | * we do not need to update the nop, and we can leave it as is. | ||
118 | * If it is not, then we need to update the nop to the ideal nop. | ||
119 | */ | ||
120 | if (jlstate == JL_STATE_START) { | ||
121 | const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; | ||
122 | const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; | ||
123 | |||
124 | if (memcmp(ideal_nop, default_nop, 5) != 0) | ||
125 | jlstate = JL_STATE_UPDATE; | ||
126 | else | ||
127 | jlstate = JL_STATE_NO_UPDATE; | ||
128 | } | ||
129 | if (jlstate == JL_STATE_UPDATE) | ||
130 | __jump_label_transform(entry, type, text_poke_early, 1); | ||
69 | } | 131 | } |
70 | 132 | ||
71 | #endif | 133 | #endif |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 884aa4053313..1b10af835c31 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -62,11 +62,6 @@ void __init default_banner(void) | |||
62 | pv_info.name); | 62 | pv_info.name); |
63 | } | 63 | } |
64 | 64 | ||
65 | /* Simple instruction patching code. */ | ||
66 | #define DEF_NATIVE(ops, name, code) \ | ||
67 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
68 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
69 | |||
70 | /* Undefined instruction for dealing with missing ops pointers. */ | 65 | /* Undefined instruction for dealing with missing ops pointers. */ |
71 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | 66 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; |
72 | 67 | ||
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2cb9470ea85b..a16bae3f83b3 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
129 | } | 129 | } |
130 | 130 | ||
131 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
132 | |||
133 | static struct pvclock_vsyscall_time_info * | ||
134 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
135 | { | ||
136 | if (!pvclock_vdso_info) { | ||
137 | BUG(); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | return &pvclock_vdso_info[cpu]; | ||
142 | } | ||
143 | |||
144 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
145 | { | ||
146 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
147 | } | ||
148 | |||
149 | #ifdef CONFIG_X86_64 | 131 | #ifdef CONFIG_X86_64 |
150 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
151 | void *v) | ||
152 | { | ||
153 | struct task_migration_notifier *mn = v; | ||
154 | struct pvclock_vsyscall_time_info *pvti; | ||
155 | |||
156 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
157 | |||
158 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
159 | if (unlikely(pvti == NULL)) | ||
160 | return NOTIFY_DONE; | ||
161 | |||
162 | pvti->migrate_count++; | ||
163 | |||
164 | return NOTIFY_DONE; | ||
165 | } | ||
166 | |||
167 | static struct notifier_block pvclock_migrate = { | ||
168 | .notifier_call = pvclock_task_migrate, | ||
169 | }; | ||
170 | |||
171 | /* | 132 | /* |
172 | * Initialize the generic pvclock vsyscall state. This will allocate | 133 | * Initialize the generic pvclock vsyscall state. This will allocate |
173 | * a/some page(s) for the per-vcpu pvclock information, set up a | 134 | * a/some page(s) for the per-vcpu pvclock information, set up a |
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | |||
181 | 142 | ||
182 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | 143 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); |
183 | 144 | ||
184 | pvclock_vdso_info = i; | ||
185 | |||
186 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | 145 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { |
187 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | 146 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, |
188 | __pa(i) + (idx*PAGE_SIZE), | 147 | __pa(i) + (idx*PAGE_SIZE), |
189 | PAGE_KERNEL_VVAR); | 148 | PAGE_KERNEL_VVAR); |
190 | } | 149 | } |
191 | 150 | ||
192 | |||
193 | register_task_migration_notifier(&pvclock_migrate); | ||
194 | |||
195 | return 0; | 151 | return 0; |
196 | } | 152 | } |
197 | #endif | 153 | #endif |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 5f24c71accaa..8ce0072cd700 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -107,6 +107,8 @@ struct x86_platform_ops x86_platform = { | |||
107 | }; | 107 | }; |
108 | 108 | ||
109 | EXPORT_SYMBOL_GPL(x86_platform); | 109 | EXPORT_SYMBOL_GPL(x86_platform); |
110 | |||
111 | #if defined(CONFIG_PCI_MSI) | ||
110 | struct x86_msi_ops x86_msi = { | 112 | struct x86_msi_ops x86_msi = { |
111 | .setup_msi_irqs = native_setup_msi_irqs, | 113 | .setup_msi_irqs = native_setup_msi_irqs, |
112 | .compose_msi_msg = native_compose_msi_msg, | 114 | .compose_msi_msg = native_compose_msi_msg, |
@@ -116,6 +118,28 @@ struct x86_msi_ops x86_msi = { | |||
116 | .setup_hpet_msi = default_setup_hpet_msi, | 118 | .setup_hpet_msi = default_setup_hpet_msi, |
117 | }; | 119 | }; |
118 | 120 | ||
121 | /* MSI arch specific hooks */ | ||
122 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | ||
123 | { | ||
124 | return x86_msi.setup_msi_irqs(dev, nvec, type); | ||
125 | } | ||
126 | |||
127 | void arch_teardown_msi_irqs(struct pci_dev *dev) | ||
128 | { | ||
129 | x86_msi.teardown_msi_irqs(dev); | ||
130 | } | ||
131 | |||
132 | void arch_teardown_msi_irq(unsigned int irq) | ||
133 | { | ||
134 | x86_msi.teardown_msi_irq(irq); | ||
135 | } | ||
136 | |||
137 | void arch_restore_msi_irqs(struct pci_dev *dev, int irq) | ||
138 | { | ||
139 | x86_msi.restore_msi_irqs(dev, irq); | ||
140 | } | ||
141 | #endif | ||
142 | |||
119 | struct x86_io_apic_ops x86_io_apic_ops = { | 143 | struct x86_io_apic_ops x86_io_apic_ops = { |
120 | .init = native_io_apic_init_mappings, | 144 | .init = native_io_apic_init_mappings, |
121 | .read = native_io_apic_read, | 145 | .read = native_io_apic_read, |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cbf..b110fe6c03d4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -413,7 +413,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
413 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 413 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
414 | (1 << KVM_FEATURE_ASYNC_PF) | | 414 | (1 << KVM_FEATURE_ASYNC_PF) | |
415 | (1 << KVM_FEATURE_PV_EOI) | | 415 | (1 << KVM_FEATURE_PV_EOI) | |
416 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 416 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | |
417 | (1 << KVM_FEATURE_PV_UNHALT); | ||
417 | 418 | ||
418 | if (sched_info_on()) | 419 | if (sched_info_on()) |
419 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | 420 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afc11245827c..5439117d5c4c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -79,16 +79,6 @@ static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | |||
79 | *((u32 *) (apic->regs + reg_off)) = val; | 79 | *((u32 *) (apic->regs + reg_off)) = val; |
80 | } | 80 | } |
81 | 81 | ||
82 | static inline int apic_test_and_set_vector(int vec, void *bitmap) | ||
83 | { | ||
84 | return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
85 | } | ||
86 | |||
87 | static inline int apic_test_and_clear_vector(int vec, void *bitmap) | ||
88 | { | ||
89 | return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
90 | } | ||
91 | |||
92 | static inline int apic_test_vector(int vec, void *bitmap) | 82 | static inline int apic_test_vector(int vec, void *bitmap) |
93 | { | 83 | { |
94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 84 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -331,10 +321,10 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | |||
331 | } | 321 | } |
332 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 322 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
333 | 323 | ||
334 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 324 | static inline void apic_set_irr(int vec, struct kvm_lapic *apic) |
335 | { | 325 | { |
336 | apic->irr_pending = true; | 326 | apic->irr_pending = true; |
337 | return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); | 327 | apic_set_vector(vec, apic->regs + APIC_IRR); |
338 | } | 328 | } |
339 | 329 | ||
340 | static inline int apic_search_irr(struct kvm_lapic *apic) | 330 | static inline int apic_search_irr(struct kvm_lapic *apic) |
@@ -681,32 +671,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
681 | if (unlikely(!apic_enabled(apic))) | 671 | if (unlikely(!apic_enabled(apic))) |
682 | break; | 672 | break; |
683 | 673 | ||
674 | result = 1; | ||
675 | |||
684 | if (dest_map) | 676 | if (dest_map) |
685 | __set_bit(vcpu->vcpu_id, dest_map); | 677 | __set_bit(vcpu->vcpu_id, dest_map); |
686 | 678 | ||
687 | if (kvm_x86_ops->deliver_posted_interrupt) { | 679 | if (kvm_x86_ops->deliver_posted_interrupt) |
688 | result = 1; | ||
689 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); | 680 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
690 | } else { | 681 | else { |
691 | result = !apic_test_and_set_irr(vector, apic); | 682 | apic_set_irr(vector, apic); |
692 | |||
693 | if (!result) { | ||
694 | if (trig_mode) | ||
695 | apic_debug("level trig mode repeatedly " | ||
696 | "for vector %d", vector); | ||
697 | goto out; | ||
698 | } | ||
699 | 683 | ||
700 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 684 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
701 | kvm_vcpu_kick(vcpu); | 685 | kvm_vcpu_kick(vcpu); |
702 | } | 686 | } |
703 | out: | ||
704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 687 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, |
705 | trig_mode, vector, !result); | 688 | trig_mode, vector, false); |
706 | break; | 689 | break; |
707 | 690 | ||
708 | case APIC_DM_REMRD: | 691 | case APIC_DM_REMRD: |
709 | apic_debug("Ignoring delivery mode 3\n"); | 692 | result = 1; |
693 | vcpu->arch.pv.pv_unhalted = 1; | ||
694 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
695 | kvm_vcpu_kick(vcpu); | ||
710 | break; | 696 | break; |
711 | 697 | ||
712 | case APIC_DM_SMI: | 698 | case APIC_DM_SMI: |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9e9285ae9b94..6e2d2c8f230b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -132,8 +132,8 @@ module_param(dbg, bool, 0644); | |||
132 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 132 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
133 | * PT32_LEVEL_BITS))) - 1)) | 133 | * PT32_LEVEL_BITS))) - 1)) |
134 | 134 | ||
135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ |
136 | | PT64_NX_MASK) | 136 | | shadow_x_mask | shadow_nx_mask) |
137 | 137 | ||
138 | #define ACC_EXEC_MASK 1 | 138 | #define ACC_EXEC_MASK 1 |
139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
@@ -331,11 +331,6 @@ static int is_large_pte(u64 pte) | |||
331 | return pte & PT_PAGE_SIZE_MASK; | 331 | return pte & PT_PAGE_SIZE_MASK; |
332 | } | 332 | } |
333 | 333 | ||
334 | static int is_dirty_gpte(unsigned long pte) | ||
335 | { | ||
336 | return pte & PT_DIRTY_MASK; | ||
337 | } | ||
338 | |||
339 | static int is_rmap_spte(u64 pte) | 334 | static int is_rmap_spte(u64 pte) |
340 | { | 335 | { |
341 | return is_shadow_present_pte(pte); | 336 | return is_shadow_present_pte(pte); |
@@ -2052,12 +2047,18 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | |||
2052 | return __shadow_walk_next(iterator, *iterator->sptep); | 2047 | return __shadow_walk_next(iterator, *iterator->sptep); |
2053 | } | 2048 | } |
2054 | 2049 | ||
2055 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | 2050 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) |
2056 | { | 2051 | { |
2057 | u64 spte; | 2052 | u64 spte; |
2058 | 2053 | ||
2054 | BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK || | ||
2055 | VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); | ||
2056 | |||
2059 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 2057 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
2060 | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; | 2058 | shadow_user_mask | shadow_x_mask; |
2059 | |||
2060 | if (accessed) | ||
2061 | spte |= shadow_accessed_mask; | ||
2061 | 2062 | ||
2062 | mmu_spte_set(sptep, spte); | 2063 | mmu_spte_set(sptep, spte); |
2063 | } | 2064 | } |
@@ -2574,14 +2575,6 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2574 | mmu_free_roots(vcpu); | 2575 | mmu_free_roots(vcpu); |
2575 | } | 2576 | } |
2576 | 2577 | ||
2577 | static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) | ||
2578 | { | ||
2579 | int bit7; | ||
2580 | |||
2581 | bit7 = (gpte >> 7) & 1; | ||
2582 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; | ||
2583 | } | ||
2584 | |||
2585 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2578 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
2586 | bool no_dirty_log) | 2579 | bool no_dirty_log) |
2587 | { | 2580 | { |
@@ -2594,26 +2587,6 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2594 | return gfn_to_pfn_memslot_atomic(slot, gfn); | 2587 | return gfn_to_pfn_memslot_atomic(slot, gfn); |
2595 | } | 2588 | } |
2596 | 2589 | ||
2597 | static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu, | ||
2598 | struct kvm_mmu_page *sp, u64 *spte, | ||
2599 | u64 gpte) | ||
2600 | { | ||
2601 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
2602 | goto no_present; | ||
2603 | |||
2604 | if (!is_present_gpte(gpte)) | ||
2605 | goto no_present; | ||
2606 | |||
2607 | if (!(gpte & PT_ACCESSED_MASK)) | ||
2608 | goto no_present; | ||
2609 | |||
2610 | return false; | ||
2611 | |||
2612 | no_present: | ||
2613 | drop_spte(vcpu->kvm, spte); | ||
2614 | return true; | ||
2615 | } | ||
2616 | |||
2617 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | 2590 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, |
2618 | struct kvm_mmu_page *sp, | 2591 | struct kvm_mmu_page *sp, |
2619 | u64 *start, u64 *end) | 2592 | u64 *start, u64 *end) |
@@ -2710,7 +2683,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2710 | iterator.level - 1, | 2683 | iterator.level - 1, |
2711 | 1, ACC_ALL, iterator.sptep); | 2684 | 1, ACC_ALL, iterator.sptep); |
2712 | 2685 | ||
2713 | link_shadow_page(iterator.sptep, sp); | 2686 | link_shadow_page(iterator.sptep, sp, true); |
2714 | } | 2687 | } |
2715 | } | 2688 | } |
2716 | return emulate; | 2689 | return emulate; |
@@ -2808,7 +2781,7 @@ exit: | |||
2808 | return ret; | 2781 | return ret; |
2809 | } | 2782 | } |
2810 | 2783 | ||
2811 | static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) | 2784 | static bool page_fault_can_be_fast(u32 error_code) |
2812 | { | 2785 | { |
2813 | /* | 2786 | /* |
2814 | * Do not fix the mmio spte with invalid generation number which | 2787 | * Do not fix the mmio spte with invalid generation number which |
@@ -2861,7 +2834,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2861 | bool ret = false; | 2834 | bool ret = false; |
2862 | u64 spte = 0ull; | 2835 | u64 spte = 0ull; |
2863 | 2836 | ||
2864 | if (!page_fault_can_be_fast(vcpu, error_code)) | 2837 | if (!page_fault_can_be_fast(error_code)) |
2865 | return false; | 2838 | return false; |
2866 | 2839 | ||
2867 | walk_shadow_page_lockless_begin(vcpu); | 2840 | walk_shadow_page_lockless_begin(vcpu); |
@@ -3209,6 +3182,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
3209 | mmu_sync_roots(vcpu); | 3182 | mmu_sync_roots(vcpu); |
3210 | spin_unlock(&vcpu->kvm->mmu_lock); | 3183 | spin_unlock(&vcpu->kvm->mmu_lock); |
3211 | } | 3184 | } |
3185 | EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); | ||
3212 | 3186 | ||
3213 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | 3187 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
3214 | u32 access, struct x86_exception *exception) | 3188 | u32 access, struct x86_exception *exception) |
@@ -3478,6 +3452,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
3478 | ++vcpu->stat.tlb_flush; | 3452 | ++vcpu->stat.tlb_flush; |
3479 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 3453 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
3480 | } | 3454 | } |
3455 | EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); | ||
3481 | 3456 | ||
3482 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 3457 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
3483 | { | 3458 | { |
@@ -3501,18 +3476,6 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
3501 | nonpaging_free(vcpu); | 3476 | nonpaging_free(vcpu); |
3502 | } | 3477 | } |
3503 | 3478 | ||
3504 | static inline void protect_clean_gpte(unsigned *access, unsigned gpte) | ||
3505 | { | ||
3506 | unsigned mask; | ||
3507 | |||
3508 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
3509 | |||
3510 | mask = (unsigned)~ACC_WRITE_MASK; | ||
3511 | /* Allow write access to dirty gptes */ | ||
3512 | mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; | ||
3513 | *access &= mask; | ||
3514 | } | ||
3515 | |||
3516 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | 3479 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, |
3517 | unsigned access, int *nr_present) | 3480 | unsigned access, int *nr_present) |
3518 | { | 3481 | { |
@@ -3530,16 +3493,6 @@ static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | |||
3530 | return false; | 3493 | return false; |
3531 | } | 3494 | } |
3532 | 3495 | ||
3533 | static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) | ||
3534 | { | ||
3535 | unsigned access; | ||
3536 | |||
3537 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
3538 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
3539 | |||
3540 | return access; | ||
3541 | } | ||
3542 | |||
3543 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) | 3496 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) |
3544 | { | 3497 | { |
3545 | unsigned index; | 3498 | unsigned index; |
@@ -3549,6 +3502,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp | |||
3549 | return mmu->last_pte_bitmap & (1 << index); | 3502 | return mmu->last_pte_bitmap & (1 << index); |
3550 | } | 3503 | } |
3551 | 3504 | ||
3505 | #define PTTYPE_EPT 18 /* arbitrary */ | ||
3506 | #define PTTYPE PTTYPE_EPT | ||
3507 | #include "paging_tmpl.h" | ||
3508 | #undef PTTYPE | ||
3509 | |||
3552 | #define PTTYPE 64 | 3510 | #define PTTYPE 64 |
3553 | #include "paging_tmpl.h" | 3511 | #include "paging_tmpl.h" |
3554 | #undef PTTYPE | 3512 | #undef PTTYPE |
@@ -3563,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3563 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 3521 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
3564 | u64 exb_bit_rsvd = 0; | 3522 | u64 exb_bit_rsvd = 0; |
3565 | 3523 | ||
3524 | context->bad_mt_xwr = 0; | ||
3525 | |||
3566 | if (!context->nx) | 3526 | if (!context->nx) |
3567 | exb_bit_rsvd = rsvd_bits(63, 63); | 3527 | exb_bit_rsvd = rsvd_bits(63, 63); |
3568 | switch (context->root_level) { | 3528 | switch (context->root_level) { |
@@ -3618,7 +3578,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3618 | } | 3578 | } |
3619 | } | 3579 | } |
3620 | 3580 | ||
3621 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | 3581 | static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, |
3582 | struct kvm_mmu *context, bool execonly) | ||
3583 | { | ||
3584 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
3585 | int pte; | ||
3586 | |||
3587 | context->rsvd_bits_mask[0][3] = | ||
3588 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | ||
3589 | context->rsvd_bits_mask[0][2] = | ||
3590 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); | ||
3591 | context->rsvd_bits_mask[0][1] = | ||
3592 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); | ||
3593 | context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); | ||
3594 | |||
3595 | /* large page */ | ||
3596 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; | ||
3597 | context->rsvd_bits_mask[1][2] = | ||
3598 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); | ||
3599 | context->rsvd_bits_mask[1][1] = | ||
3600 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); | ||
3601 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
3602 | |||
3603 | for (pte = 0; pte < 64; pte++) { | ||
3604 | int rwx_bits = pte & 7; | ||
3605 | int mt = pte >> 3; | ||
3606 | if (mt == 0x2 || mt == 0x3 || mt == 0x7 || | ||
3607 | rwx_bits == 0x2 || rwx_bits == 0x6 || | ||
3608 | (rwx_bits == 0x4 && !execonly)) | ||
3609 | context->bad_mt_xwr |= (1ull << pte); | ||
3610 | } | ||
3611 | } | ||
3612 | |||
3613 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, | ||
3614 | struct kvm_mmu *mmu, bool ept) | ||
3622 | { | 3615 | { |
3623 | unsigned bit, byte, pfec; | 3616 | unsigned bit, byte, pfec; |
3624 | u8 map; | 3617 | u8 map; |
@@ -3636,12 +3629,16 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu | |||
3636 | w = bit & ACC_WRITE_MASK; | 3629 | w = bit & ACC_WRITE_MASK; |
3637 | u = bit & ACC_USER_MASK; | 3630 | u = bit & ACC_USER_MASK; |
3638 | 3631 | ||
3639 | /* Not really needed: !nx will cause pte.nx to fault */ | 3632 | if (!ept) { |
3640 | x |= !mmu->nx; | 3633 | /* Not really needed: !nx will cause pte.nx to fault */ |
3641 | /* Allow supervisor writes if !cr0.wp */ | 3634 | x |= !mmu->nx; |
3642 | w |= !is_write_protection(vcpu) && !uf; | 3635 | /* Allow supervisor writes if !cr0.wp */ |
3643 | /* Disallow supervisor fetches of user code if cr4.smep */ | 3636 | w |= !is_write_protection(vcpu) && !uf; |
3644 | x &= !(smep && u && !uf); | 3637 | /* Disallow supervisor fetches of user code if cr4.smep */ |
3638 | x &= !(smep && u && !uf); | ||
3639 | } else | ||
3640 | /* Not really needed: no U/S accesses on ept */ | ||
3641 | u = 1; | ||
3645 | 3642 | ||
3646 | fault = (ff && !x) || (uf && !u) || (wf && !w); | 3643 | fault = (ff && !x) || (uf && !u) || (wf && !w); |
3647 | map |= fault << bit; | 3644 | map |= fault << bit; |
@@ -3676,7 +3673,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3676 | context->root_level = level; | 3673 | context->root_level = level; |
3677 | 3674 | ||
3678 | reset_rsvds_bits_mask(vcpu, context); | 3675 | reset_rsvds_bits_mask(vcpu, context); |
3679 | update_permission_bitmask(vcpu, context); | 3676 | update_permission_bitmask(vcpu, context, false); |
3680 | update_last_pte_bitmap(vcpu, context); | 3677 | update_last_pte_bitmap(vcpu, context); |
3681 | 3678 | ||
3682 | ASSERT(is_pae(vcpu)); | 3679 | ASSERT(is_pae(vcpu)); |
@@ -3706,7 +3703,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3706 | context->root_level = PT32_ROOT_LEVEL; | 3703 | context->root_level = PT32_ROOT_LEVEL; |
3707 | 3704 | ||
3708 | reset_rsvds_bits_mask(vcpu, context); | 3705 | reset_rsvds_bits_mask(vcpu, context); |
3709 | update_permission_bitmask(vcpu, context); | 3706 | update_permission_bitmask(vcpu, context, false); |
3710 | update_last_pte_bitmap(vcpu, context); | 3707 | update_last_pte_bitmap(vcpu, context); |
3711 | 3708 | ||
3712 | context->new_cr3 = paging_new_cr3; | 3709 | context->new_cr3 = paging_new_cr3; |
@@ -3768,7 +3765,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3768 | context->gva_to_gpa = paging32_gva_to_gpa; | 3765 | context->gva_to_gpa = paging32_gva_to_gpa; |
3769 | } | 3766 | } |
3770 | 3767 | ||
3771 | update_permission_bitmask(vcpu, context); | 3768 | update_permission_bitmask(vcpu, context, false); |
3772 | update_last_pte_bitmap(vcpu, context); | 3769 | update_last_pte_bitmap(vcpu, context); |
3773 | 3770 | ||
3774 | return 0; | 3771 | return 0; |
@@ -3800,6 +3797,33 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
3800 | } | 3797 | } |
3801 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | 3798 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); |
3802 | 3799 | ||
3800 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | ||
3801 | bool execonly) | ||
3802 | { | ||
3803 | ASSERT(vcpu); | ||
3804 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
3805 | |||
3806 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | ||
3807 | |||
3808 | context->nx = true; | ||
3809 | context->new_cr3 = paging_new_cr3; | ||
3810 | context->page_fault = ept_page_fault; | ||
3811 | context->gva_to_gpa = ept_gva_to_gpa; | ||
3812 | context->sync_page = ept_sync_page; | ||
3813 | context->invlpg = ept_invlpg; | ||
3814 | context->update_pte = ept_update_pte; | ||
3815 | context->free = paging_free; | ||
3816 | context->root_level = context->shadow_root_level; | ||
3817 | context->root_hpa = INVALID_PAGE; | ||
3818 | context->direct_map = false; | ||
3819 | |||
3820 | update_permission_bitmask(vcpu, context, true); | ||
3821 | reset_rsvds_bits_mask_ept(vcpu, context, execonly); | ||
3822 | |||
3823 | return 0; | ||
3824 | } | ||
3825 | EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); | ||
3826 | |||
3803 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | 3827 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) |
3804 | { | 3828 | { |
3805 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); | 3829 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); |
@@ -3847,7 +3871,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3847 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | 3871 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; |
3848 | } | 3872 | } |
3849 | 3873 | ||
3850 | update_permission_bitmask(vcpu, g_context); | 3874 | update_permission_bitmask(vcpu, g_context, false); |
3851 | update_last_pte_bitmap(vcpu, g_context); | 3875 | update_last_pte_bitmap(vcpu, g_context); |
3852 | 3876 | ||
3853 | return 0; | 3877 | return 0; |
@@ -3923,8 +3947,8 @@ static bool need_remote_flush(u64 old, u64 new) | |||
3923 | return true; | 3947 | return true; |
3924 | if ((old ^ new) & PT64_BASE_ADDR_MASK) | 3948 | if ((old ^ new) & PT64_BASE_ADDR_MASK) |
3925 | return true; | 3949 | return true; |
3926 | old ^= PT64_NX_MASK; | 3950 | old ^= shadow_nx_mask; |
3927 | new ^= PT64_NX_MASK; | 3951 | new ^= shadow_nx_mask; |
3928 | return (old & ~new & PT64_PERM_MASK) != 0; | 3952 | return (old & ~new & PT64_PERM_MASK) != 0; |
3929 | } | 3953 | } |
3930 | 3954 | ||
@@ -4182,7 +4206,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
4182 | switch (er) { | 4206 | switch (er) { |
4183 | case EMULATE_DONE: | 4207 | case EMULATE_DONE: |
4184 | return 1; | 4208 | return 1; |
4185 | case EMULATE_DO_MMIO: | 4209 | case EMULATE_USER_EXIT: |
4186 | ++vcpu->stat.mmio_exits; | 4210 | ++vcpu->stat.mmio_exits; |
4187 | /* fall through */ | 4211 | /* fall through */ |
4188 | case EMULATE_FAIL: | 4212 | case EMULATE_FAIL: |
@@ -4390,11 +4414,8 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | |||
4390 | /* | 4414 | /* |
4391 | * The very rare case: if the generation-number is round, | 4415 | * The very rare case: if the generation-number is round, |
4392 | * zap all shadow pages. | 4416 | * zap all shadow pages. |
4393 | * | ||
4394 | * The max value is MMIO_MAX_GEN - 1 since it is not called | ||
4395 | * when mark memslot invalid. | ||
4396 | */ | 4417 | */ |
4397 | if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) { | 4418 | if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { |
4398 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); | 4419 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); |
4399 | kvm_mmu_invalidate_zap_all_pages(kvm); | 4420 | kvm_mmu_invalidate_zap_all_pages(kvm); |
4400 | } | 4421 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 5b59c573aba7..77e044a0f5f7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -71,6 +71,8 @@ enum { | |||
71 | 71 | ||
72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
74 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | ||
75 | bool execonly); | ||
74 | 76 | ||
75 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 77 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
76 | { | 78 | { |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7769699d48a8..043330159179 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -23,6 +23,13 @@ | |||
23 | * so the code in this file is compiled twice, once per pte size. | 23 | * so the code in this file is compiled twice, once per pte size. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | /* | ||
27 | * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro | ||
28 | * uses for EPT without A/D paging type. | ||
29 | */ | ||
30 | extern u64 __pure __using_nonexistent_pte_bit(void) | ||
31 | __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT"); | ||
32 | |||
26 | #if PTTYPE == 64 | 33 | #if PTTYPE == 64 |
27 | #define pt_element_t u64 | 34 | #define pt_element_t u64 |
28 | #define guest_walker guest_walker64 | 35 | #define guest_walker guest_walker64 |
@@ -32,6 +39,10 @@ | |||
32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 39 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 40 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 41 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
42 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
43 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
44 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | ||
45 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | ||
35 | #ifdef CONFIG_X86_64 | 46 | #ifdef CONFIG_X86_64 |
36 | #define PT_MAX_FULL_LEVELS 4 | 47 | #define PT_MAX_FULL_LEVELS 4 |
37 | #define CMPXCHG cmpxchg | 48 | #define CMPXCHG cmpxchg |
@@ -49,7 +60,26 @@ | |||
49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 60 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
50 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 61 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
51 | #define PT_MAX_FULL_LEVELS 2 | 62 | #define PT_MAX_FULL_LEVELS 2 |
63 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
64 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
65 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | ||
66 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | ||
52 | #define CMPXCHG cmpxchg | 67 | #define CMPXCHG cmpxchg |
68 | #elif PTTYPE == PTTYPE_EPT | ||
69 | #define pt_element_t u64 | ||
70 | #define guest_walker guest_walkerEPT | ||
71 | #define FNAME(name) ept_##name | ||
72 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK | ||
73 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) | ||
74 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | ||
75 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
76 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | ||
77 | #define PT_GUEST_ACCESSED_MASK 0 | ||
78 | #define PT_GUEST_DIRTY_MASK 0 | ||
79 | #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit() | ||
80 | #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit() | ||
81 | #define CMPXCHG cmpxchg64 | ||
82 | #define PT_MAX_FULL_LEVELS 4 | ||
53 | #else | 83 | #else |
54 | #error Invalid PTTYPE value | 84 | #error Invalid PTTYPE value |
55 | #endif | 85 | #endif |
@@ -80,6 +110,40 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | |||
80 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; | 110 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; |
81 | } | 111 | } |
82 | 112 | ||
113 | static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) | ||
114 | { | ||
115 | unsigned mask; | ||
116 | |||
117 | /* dirty bit is not supported, so no need to track it */ | ||
118 | if (!PT_GUEST_DIRTY_MASK) | ||
119 | return; | ||
120 | |||
121 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
122 | |||
123 | mask = (unsigned)~ACC_WRITE_MASK; | ||
124 | /* Allow write access to dirty gptes */ | ||
125 | mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & | ||
126 | PT_WRITABLE_MASK; | ||
127 | *access &= mask; | ||
128 | } | ||
129 | |||
130 | static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) | ||
131 | { | ||
132 | int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f; | ||
133 | |||
134 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) | | ||
135 | ((mmu->bad_mt_xwr & (1ull << low6)) != 0); | ||
136 | } | ||
137 | |||
138 | static inline int FNAME(is_present_gpte)(unsigned long pte) | ||
139 | { | ||
140 | #if PTTYPE != PTTYPE_EPT | ||
141 | return is_present_gpte(pte); | ||
142 | #else | ||
143 | return pte & 7; | ||
144 | #endif | ||
145 | } | ||
146 | |||
83 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 147 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
84 | pt_element_t __user *ptep_user, unsigned index, | 148 | pt_element_t __user *ptep_user, unsigned index, |
85 | pt_element_t orig_pte, pt_element_t new_pte) | 149 | pt_element_t orig_pte, pt_element_t new_pte) |
@@ -103,6 +167,42 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
103 | return (ret != orig_pte); | 167 | return (ret != orig_pte); |
104 | } | 168 | } |
105 | 169 | ||
170 | static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | ||
171 | struct kvm_mmu_page *sp, u64 *spte, | ||
172 | u64 gpte) | ||
173 | { | ||
174 | if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
175 | goto no_present; | ||
176 | |||
177 | if (!FNAME(is_present_gpte)(gpte)) | ||
178 | goto no_present; | ||
179 | |||
180 | /* if accessed bit is not supported prefetch non accessed gpte */ | ||
181 | if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK)) | ||
182 | goto no_present; | ||
183 | |||
184 | return false; | ||
185 | |||
186 | no_present: | ||
187 | drop_spte(vcpu->kvm, spte); | ||
188 | return true; | ||
189 | } | ||
190 | |||
191 | static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) | ||
192 | { | ||
193 | unsigned access; | ||
194 | #if PTTYPE == PTTYPE_EPT | ||
195 | access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | | ||
196 | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | | ||
197 | ACC_USER_MASK; | ||
198 | #else | ||
199 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
200 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
201 | #endif | ||
202 | |||
203 | return access; | ||
204 | } | ||
205 | |||
106 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | 206 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, |
107 | struct kvm_mmu *mmu, | 207 | struct kvm_mmu *mmu, |
108 | struct guest_walker *walker, | 208 | struct guest_walker *walker, |
@@ -114,18 +214,23 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | |||
114 | gfn_t table_gfn; | 214 | gfn_t table_gfn; |
115 | int ret; | 215 | int ret; |
116 | 216 | ||
217 | /* dirty/accessed bits are not supported, so no need to update them */ | ||
218 | if (!PT_GUEST_DIRTY_MASK) | ||
219 | return 0; | ||
220 | |||
117 | for (level = walker->max_level; level >= walker->level; --level) { | 221 | for (level = walker->max_level; level >= walker->level; --level) { |
118 | pte = orig_pte = walker->ptes[level - 1]; | 222 | pte = orig_pte = walker->ptes[level - 1]; |
119 | table_gfn = walker->table_gfn[level - 1]; | 223 | table_gfn = walker->table_gfn[level - 1]; |
120 | ptep_user = walker->ptep_user[level - 1]; | 224 | ptep_user = walker->ptep_user[level - 1]; |
121 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); | 225 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); |
122 | if (!(pte & PT_ACCESSED_MASK)) { | 226 | if (!(pte & PT_GUEST_ACCESSED_MASK)) { |
123 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); | 227 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); |
124 | pte |= PT_ACCESSED_MASK; | 228 | pte |= PT_GUEST_ACCESSED_MASK; |
125 | } | 229 | } |
126 | if (level == walker->level && write_fault && !is_dirty_gpte(pte)) { | 230 | if (level == walker->level && write_fault && |
231 | !(pte & PT_GUEST_DIRTY_MASK)) { | ||
127 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 232 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
128 | pte |= PT_DIRTY_MASK; | 233 | pte |= PT_GUEST_DIRTY_MASK; |
129 | } | 234 | } |
130 | if (pte == orig_pte) | 235 | if (pte == orig_pte) |
131 | continue; | 236 | continue; |
@@ -170,7 +275,7 @@ retry_walk: | |||
170 | if (walker->level == PT32E_ROOT_LEVEL) { | 275 | if (walker->level == PT32E_ROOT_LEVEL) { |
171 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); | 276 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); |
172 | trace_kvm_mmu_paging_element(pte, walker->level); | 277 | trace_kvm_mmu_paging_element(pte, walker->level); |
173 | if (!is_present_gpte(pte)) | 278 | if (!FNAME(is_present_gpte)(pte)) |
174 | goto error; | 279 | goto error; |
175 | --walker->level; | 280 | --walker->level; |
176 | } | 281 | } |
@@ -179,7 +284,7 @@ retry_walk: | |||
179 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 284 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
180 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); | 285 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); |
181 | 286 | ||
182 | accessed_dirty = PT_ACCESSED_MASK; | 287 | accessed_dirty = PT_GUEST_ACCESSED_MASK; |
183 | pt_access = pte_access = ACC_ALL; | 288 | pt_access = pte_access = ACC_ALL; |
184 | ++walker->level; | 289 | ++walker->level; |
185 | 290 | ||
@@ -215,17 +320,17 @@ retry_walk: | |||
215 | 320 | ||
216 | trace_kvm_mmu_paging_element(pte, walker->level); | 321 | trace_kvm_mmu_paging_element(pte, walker->level); |
217 | 322 | ||
218 | if (unlikely(!is_present_gpte(pte))) | 323 | if (unlikely(!FNAME(is_present_gpte)(pte))) |
219 | goto error; | 324 | goto error; |
220 | 325 | ||
221 | if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, | 326 | if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, |
222 | walker->level))) { | 327 | walker->level))) { |
223 | errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; | 328 | errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; |
224 | goto error; | 329 | goto error; |
225 | } | 330 | } |
226 | 331 | ||
227 | accessed_dirty &= pte; | 332 | accessed_dirty &= pte; |
228 | pte_access = pt_access & gpte_access(vcpu, pte); | 333 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); |
229 | 334 | ||
230 | walker->ptes[walker->level - 1] = pte; | 335 | walker->ptes[walker->level - 1] = pte; |
231 | } while (!is_last_gpte(mmu, walker->level, pte)); | 336 | } while (!is_last_gpte(mmu, walker->level, pte)); |
@@ -248,13 +353,15 @@ retry_walk: | |||
248 | walker->gfn = real_gpa >> PAGE_SHIFT; | 353 | walker->gfn = real_gpa >> PAGE_SHIFT; |
249 | 354 | ||
250 | if (!write_fault) | 355 | if (!write_fault) |
251 | protect_clean_gpte(&pte_access, pte); | 356 | FNAME(protect_clean_gpte)(&pte_access, pte); |
252 | else | 357 | else |
253 | /* | 358 | /* |
254 | * On a write fault, fold the dirty bit into accessed_dirty by | 359 | * On a write fault, fold the dirty bit into accessed_dirty. |
255 | * shifting it one place right. | 360 | * For modes without A/D bits support accessed_dirty will be |
361 | * always clear. | ||
256 | */ | 362 | */ |
257 | accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT); | 363 | accessed_dirty &= pte >> |
364 | (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); | ||
258 | 365 | ||
259 | if (unlikely(!accessed_dirty)) { | 366 | if (unlikely(!accessed_dirty)) { |
260 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); | 367 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); |
@@ -279,6 +386,25 @@ error: | |||
279 | walker->fault.vector = PF_VECTOR; | 386 | walker->fault.vector = PF_VECTOR; |
280 | walker->fault.error_code_valid = true; | 387 | walker->fault.error_code_valid = true; |
281 | walker->fault.error_code = errcode; | 388 | walker->fault.error_code = errcode; |
389 | |||
390 | #if PTTYPE == PTTYPE_EPT | ||
391 | /* | ||
392 | * Use PFERR_RSVD_MASK in error_code to to tell if EPT | ||
393 | * misconfiguration requires to be injected. The detection is | ||
394 | * done by is_rsvd_bits_set() above. | ||
395 | * | ||
396 | * We set up the value of exit_qualification to inject: | ||
397 | * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation | ||
398 | * [5:3] - Calculated by the page walk of the guest EPT page tables | ||
399 | * [7:8] - Derived from [7:8] of real exit_qualification | ||
400 | * | ||
401 | * The other bits are set to 0. | ||
402 | */ | ||
403 | if (!(errcode & PFERR_RSVD_MASK)) { | ||
404 | vcpu->arch.exit_qualification &= 0x187; | ||
405 | vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; | ||
406 | } | ||
407 | #endif | ||
282 | walker->fault.address = addr; | 408 | walker->fault.address = addr; |
283 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; | 409 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; |
284 | 410 | ||
@@ -293,6 +419,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
293 | access); | 419 | access); |
294 | } | 420 | } |
295 | 421 | ||
422 | #if PTTYPE != PTTYPE_EPT | ||
296 | static int FNAME(walk_addr_nested)(struct guest_walker *walker, | 423 | static int FNAME(walk_addr_nested)(struct guest_walker *walker, |
297 | struct kvm_vcpu *vcpu, gva_t addr, | 424 | struct kvm_vcpu *vcpu, gva_t addr, |
298 | u32 access) | 425 | u32 access) |
@@ -300,6 +427,7 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker, | |||
300 | return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, | 427 | return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, |
301 | addr, access); | 428 | addr, access); |
302 | } | 429 | } |
430 | #endif | ||
303 | 431 | ||
304 | static bool | 432 | static bool |
305 | FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 433 | FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
@@ -309,14 +437,14 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
309 | gfn_t gfn; | 437 | gfn_t gfn; |
310 | pfn_t pfn; | 438 | pfn_t pfn; |
311 | 439 | ||
312 | if (prefetch_invalid_gpte(vcpu, sp, spte, gpte)) | 440 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
313 | return false; | 441 | return false; |
314 | 442 | ||
315 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 443 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
316 | 444 | ||
317 | gfn = gpte_to_gfn(gpte); | 445 | gfn = gpte_to_gfn(gpte); |
318 | pte_access = sp->role.access & gpte_access(vcpu, gpte); | 446 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
319 | protect_clean_gpte(&pte_access, gpte); | 447 | FNAME(protect_clean_gpte)(&pte_access, gpte); |
320 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | 448 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
321 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); | 449 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); |
322 | if (is_error_pfn(pfn)) | 450 | if (is_error_pfn(pfn)) |
@@ -446,7 +574,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
446 | goto out_gpte_changed; | 574 | goto out_gpte_changed; |
447 | 575 | ||
448 | if (sp) | 576 | if (sp) |
449 | link_shadow_page(it.sptep, sp); | 577 | link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); |
450 | } | 578 | } |
451 | 579 | ||
452 | for (; | 580 | for (; |
@@ -466,7 +594,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
466 | 594 | ||
467 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | 595 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, |
468 | true, direct_access, it.sptep); | 596 | true, direct_access, it.sptep); |
469 | link_shadow_page(it.sptep, sp); | 597 | link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); |
470 | } | 598 | } |
471 | 599 | ||
472 | clear_sp_write_flooding_count(it.sptep); | 600 | clear_sp_write_flooding_count(it.sptep); |
@@ -727,6 +855,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | |||
727 | return gpa; | 855 | return gpa; |
728 | } | 856 | } |
729 | 857 | ||
858 | #if PTTYPE != PTTYPE_EPT | ||
730 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | 859 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, |
731 | u32 access, | 860 | u32 access, |
732 | struct x86_exception *exception) | 861 | struct x86_exception *exception) |
@@ -745,6 +874,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
745 | 874 | ||
746 | return gpa; | 875 | return gpa; |
747 | } | 876 | } |
877 | #endif | ||
748 | 878 | ||
749 | /* | 879 | /* |
750 | * Using the cached information from sp->gfns is safe because: | 880 | * Using the cached information from sp->gfns is safe because: |
@@ -785,15 +915,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
785 | sizeof(pt_element_t))) | 915 | sizeof(pt_element_t))) |
786 | return -EINVAL; | 916 | return -EINVAL; |
787 | 917 | ||
788 | if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) { | 918 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
789 | vcpu->kvm->tlbs_dirty++; | 919 | vcpu->kvm->tlbs_dirty++; |
790 | continue; | 920 | continue; |
791 | } | 921 | } |
792 | 922 | ||
793 | gfn = gpte_to_gfn(gpte); | 923 | gfn = gpte_to_gfn(gpte); |
794 | pte_access = sp->role.access; | 924 | pte_access = sp->role.access; |
795 | pte_access &= gpte_access(vcpu, gpte); | 925 | pte_access &= FNAME(gpte_access)(vcpu, gpte); |
796 | protect_clean_gpte(&pte_access, gpte); | 926 | FNAME(protect_clean_gpte)(&pte_access, gpte); |
797 | 927 | ||
798 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, | 928 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, |
799 | &nr_present)) | 929 | &nr_present)) |
@@ -830,3 +960,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
830 | #undef gpte_to_gfn | 960 | #undef gpte_to_gfn |
831 | #undef gpte_to_gfn_lvl | 961 | #undef gpte_to_gfn_lvl |
832 | #undef CMPXCHG | 962 | #undef CMPXCHG |
963 | #undef PT_GUEST_ACCESSED_MASK | ||
964 | #undef PT_GUEST_DIRTY_MASK | ||
965 | #undef PT_GUEST_DIRTY_SHIFT | ||
966 | #undef PT_GUEST_ACCESSED_SHIFT | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index c53e797e7369..5c4f63151b4d 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -160,7 +160,7 @@ static void stop_counter(struct kvm_pmc *pmc) | |||
160 | 160 | ||
161 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | 161 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, |
162 | unsigned config, bool exclude_user, bool exclude_kernel, | 162 | unsigned config, bool exclude_user, bool exclude_kernel, |
163 | bool intr) | 163 | bool intr, bool in_tx, bool in_tx_cp) |
164 | { | 164 | { |
165 | struct perf_event *event; | 165 | struct perf_event *event; |
166 | struct perf_event_attr attr = { | 166 | struct perf_event_attr attr = { |
@@ -173,6 +173,10 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | |||
173 | .exclude_kernel = exclude_kernel, | 173 | .exclude_kernel = exclude_kernel, |
174 | .config = config, | 174 | .config = config, |
175 | }; | 175 | }; |
176 | if (in_tx) | ||
177 | attr.config |= HSW_IN_TX; | ||
178 | if (in_tx_cp) | ||
179 | attr.config |= HSW_IN_TX_CHECKPOINTED; | ||
176 | 180 | ||
177 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); | 181 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); |
178 | 182 | ||
@@ -226,7 +230,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
226 | 230 | ||
227 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | | 231 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | |
228 | ARCH_PERFMON_EVENTSEL_INV | | 232 | ARCH_PERFMON_EVENTSEL_INV | |
229 | ARCH_PERFMON_EVENTSEL_CMASK))) { | 233 | ARCH_PERFMON_EVENTSEL_CMASK | |
234 | HSW_IN_TX | | ||
235 | HSW_IN_TX_CHECKPOINTED))) { | ||
230 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | 236 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, |
231 | unit_mask); | 237 | unit_mask); |
232 | if (config != PERF_COUNT_HW_MAX) | 238 | if (config != PERF_COUNT_HW_MAX) |
@@ -239,7 +245,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
239 | reprogram_counter(pmc, type, config, | 245 | reprogram_counter(pmc, type, config, |
240 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | 246 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), |
241 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | 247 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), |
242 | eventsel & ARCH_PERFMON_EVENTSEL_INT); | 248 | eventsel & ARCH_PERFMON_EVENTSEL_INT, |
249 | (eventsel & HSW_IN_TX), | ||
250 | (eventsel & HSW_IN_TX_CHECKPOINTED)); | ||
243 | } | 251 | } |
244 | 252 | ||
245 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | 253 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) |
@@ -256,7 +264,7 @@ static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | |||
256 | arch_events[fixed_pmc_events[idx]].event_type, | 264 | arch_events[fixed_pmc_events[idx]].event_type, |
257 | !(en & 0x2), /* exclude user */ | 265 | !(en & 0x2), /* exclude user */ |
258 | !(en & 0x1), /* exclude kernel */ | 266 | !(en & 0x1), /* exclude kernel */ |
259 | pmi); | 267 | pmi, false, false); |
260 | } | 268 | } |
261 | 269 | ||
262 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) | 270 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) |
@@ -408,7 +416,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
408 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | 416 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { |
409 | if (data == pmc->eventsel) | 417 | if (data == pmc->eventsel) |
410 | return 0; | 418 | return 0; |
411 | if (!(data & 0xffffffff00200000ull)) { | 419 | if (!(data & pmu->reserved_bits)) { |
412 | reprogram_gp_counter(pmc, data); | 420 | reprogram_gp_counter(pmc, data); |
413 | return 0; | 421 | return 0; |
414 | } | 422 | } |
@@ -450,6 +458,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
450 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | 458 | pmu->counter_bitmask[KVM_PMC_GP] = 0; |
451 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | 459 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; |
452 | pmu->version = 0; | 460 | pmu->version = 0; |
461 | pmu->reserved_bits = 0xffffffff00200000ull; | ||
453 | 462 | ||
454 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | 463 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); |
455 | if (!entry) | 464 | if (!entry) |
@@ -478,6 +487,12 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
478 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | 487 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | |
479 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); | 488 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); |
480 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | 489 | pmu->global_ctrl_mask = ~pmu->global_ctrl; |
490 | |||
491 | entry = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
492 | if (entry && | ||
493 | (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | ||
494 | (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) | ||
495 | pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; | ||
481 | } | 496 | } |
482 | 497 | ||
483 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | 498 | void kvm_pmu_init(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 064d0be67ecc..1f1da43ff2a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -373,6 +373,7 @@ struct nested_vmx { | |||
373 | * we must keep them pinned while L2 runs. | 373 | * we must keep them pinned while L2 runs. |
374 | */ | 374 | */ |
375 | struct page *apic_access_page; | 375 | struct page *apic_access_page; |
376 | u64 msr_ia32_feature_control; | ||
376 | }; | 377 | }; |
377 | 378 | ||
378 | #define POSTED_INTR_ON 0 | 379 | #define POSTED_INTR_ON 0 |
@@ -711,10 +712,10 @@ static void nested_release_page_clean(struct page *page) | |||
711 | kvm_release_page_clean(page); | 712 | kvm_release_page_clean(page); |
712 | } | 713 | } |
713 | 714 | ||
715 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | ||
714 | static u64 construct_eptp(unsigned long root_hpa); | 716 | static u64 construct_eptp(unsigned long root_hpa); |
715 | static void kvm_cpu_vmxon(u64 addr); | 717 | static void kvm_cpu_vmxon(u64 addr); |
716 | static void kvm_cpu_vmxoff(void); | 718 | static void kvm_cpu_vmxoff(void); |
717 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
718 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 719 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
719 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 720 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
720 | struct kvm_segment *var, int seg); | 721 | struct kvm_segment *var, int seg); |
@@ -1039,12 +1040,16 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) | |||
1039 | (vmcs12->secondary_vm_exec_control & bit); | 1040 | (vmcs12->secondary_vm_exec_control & bit); |
1040 | } | 1041 | } |
1041 | 1042 | ||
1042 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12, | 1043 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) |
1043 | struct kvm_vcpu *vcpu) | ||
1044 | { | 1044 | { |
1045 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; | 1045 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; |
1046 | } | 1046 | } |
1047 | 1047 | ||
1048 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | ||
1049 | { | ||
1050 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); | ||
1051 | } | ||
1052 | |||
1048 | static inline bool is_exception(u32 intr_info) | 1053 | static inline bool is_exception(u32 intr_info) |
1049 | { | 1054 | { |
1050 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1055 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
@@ -2155,6 +2160,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | |||
2155 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | 2160 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; |
2156 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | 2161 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; |
2157 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | 2162 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; |
2163 | static u32 nested_vmx_ept_caps; | ||
2158 | static __init void nested_vmx_setup_ctls_msrs(void) | 2164 | static __init void nested_vmx_setup_ctls_msrs(void) |
2159 | { | 2165 | { |
2160 | /* | 2166 | /* |
@@ -2190,14 +2196,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2190 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and | 2196 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and |
2191 | * 17 must be 1. | 2197 | * 17 must be 1. |
2192 | */ | 2198 | */ |
2199 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, | ||
2200 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); | ||
2193 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2201 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; |
2194 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ | 2202 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ |
2203 | nested_vmx_exit_ctls_high &= | ||
2195 | #ifdef CONFIG_X86_64 | 2204 | #ifdef CONFIG_X86_64 |
2196 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2205 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
2197 | #else | ||
2198 | nested_vmx_exit_ctls_high = 0; | ||
2199 | #endif | 2206 | #endif |
2200 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2207 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; |
2208 | nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2209 | VM_EXIT_LOAD_IA32_EFER); | ||
2201 | 2210 | ||
2202 | /* entry controls */ | 2211 | /* entry controls */ |
2203 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2212 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2205,8 +2214,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2205 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ | 2214 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ |
2206 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2215 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; |
2207 | nested_vmx_entry_ctls_high &= | 2216 | nested_vmx_entry_ctls_high &= |
2208 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; | 2217 | #ifdef CONFIG_X86_64 |
2209 | nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2218 | VM_ENTRY_IA32E_MODE | |
2219 | #endif | ||
2220 | VM_ENTRY_LOAD_IA32_PAT; | ||
2221 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2222 | VM_ENTRY_LOAD_IA32_EFER); | ||
2210 | 2223 | ||
2211 | /* cpu-based controls */ | 2224 | /* cpu-based controls */ |
2212 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2225 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
@@ -2241,6 +2254,22 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2241 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2254 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2242 | SECONDARY_EXEC_WBINVD_EXITING; | 2255 | SECONDARY_EXEC_WBINVD_EXITING; |
2243 | 2256 | ||
2257 | if (enable_ept) { | ||
2258 | /* nested EPT: emulate EPT also to L1 */ | ||
2259 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; | ||
2260 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | ||
2261 | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; | ||
2262 | nested_vmx_ept_caps &= vmx_capability.ept; | ||
2263 | /* | ||
2264 | * Since invept is completely emulated we support both global | ||
2265 | * and context invalidation independent of what host cpu | ||
2266 | * supports | ||
2267 | */ | ||
2268 | nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | | ||
2269 | VMX_EPT_EXTENT_CONTEXT_BIT; | ||
2270 | } else | ||
2271 | nested_vmx_ept_caps = 0; | ||
2272 | |||
2244 | /* miscellaneous data */ | 2273 | /* miscellaneous data */ |
2245 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2274 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); |
2246 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | 2275 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | |
@@ -2282,8 +2311,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2282 | 2311 | ||
2283 | switch (msr_index) { | 2312 | switch (msr_index) { |
2284 | case MSR_IA32_FEATURE_CONTROL: | 2313 | case MSR_IA32_FEATURE_CONTROL: |
2285 | *pdata = 0; | 2314 | if (nested_vmx_allowed(vcpu)) { |
2286 | break; | 2315 | *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; |
2316 | break; | ||
2317 | } | ||
2318 | return 0; | ||
2287 | case MSR_IA32_VMX_BASIC: | 2319 | case MSR_IA32_VMX_BASIC: |
2288 | /* | 2320 | /* |
2289 | * This MSR reports some information about VMX support. We | 2321 | * This MSR reports some information about VMX support. We |
@@ -2346,8 +2378,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2346 | nested_vmx_secondary_ctls_high); | 2378 | nested_vmx_secondary_ctls_high); |
2347 | break; | 2379 | break; |
2348 | case MSR_IA32_VMX_EPT_VPID_CAP: | 2380 | case MSR_IA32_VMX_EPT_VPID_CAP: |
2349 | /* Currently, no nested ept or nested vpid */ | 2381 | /* Currently, no nested vpid support */ |
2350 | *pdata = 0; | 2382 | *pdata = nested_vmx_ept_caps; |
2351 | break; | 2383 | break; |
2352 | default: | 2384 | default: |
2353 | return 0; | 2385 | return 0; |
@@ -2356,14 +2388,24 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2356 | return 1; | 2388 | return 1; |
2357 | } | 2389 | } |
2358 | 2390 | ||
2359 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 2391 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2360 | { | 2392 | { |
2393 | u32 msr_index = msr_info->index; | ||
2394 | u64 data = msr_info->data; | ||
2395 | bool host_initialized = msr_info->host_initiated; | ||
2396 | |||
2361 | if (!nested_vmx_allowed(vcpu)) | 2397 | if (!nested_vmx_allowed(vcpu)) |
2362 | return 0; | 2398 | return 0; |
2363 | 2399 | ||
2364 | if (msr_index == MSR_IA32_FEATURE_CONTROL) | 2400 | if (msr_index == MSR_IA32_FEATURE_CONTROL) { |
2365 | /* TODO: the right thing. */ | 2401 | if (!host_initialized && |
2402 | to_vmx(vcpu)->nested.msr_ia32_feature_control | ||
2403 | & FEATURE_CONTROL_LOCKED) | ||
2404 | return 0; | ||
2405 | to_vmx(vcpu)->nested.msr_ia32_feature_control = data; | ||
2366 | return 1; | 2406 | return 1; |
2407 | } | ||
2408 | |||
2367 | /* | 2409 | /* |
2368 | * No need to treat VMX capability MSRs specially: If we don't handle | 2410 | * No need to treat VMX capability MSRs specially: If we don't handle |
2369 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) | 2411 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) |
@@ -2494,7 +2536,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2494 | return 1; | 2536 | return 1; |
2495 | /* Otherwise falls through */ | 2537 | /* Otherwise falls through */ |
2496 | default: | 2538 | default: |
2497 | if (vmx_set_vmx_msr(vcpu, msr_index, data)) | 2539 | if (vmx_set_vmx_msr(vcpu, msr_info)) |
2498 | break; | 2540 | break; |
2499 | msr = find_msr_entry(vmx, msr_index); | 2541 | msr = find_msr_entry(vmx, msr_index); |
2500 | if (msr) { | 2542 | if (msr) { |
@@ -5302,9 +5344,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
5302 | 5344 | ||
5303 | /* It is a write fault? */ | 5345 | /* It is a write fault? */ |
5304 | error_code = exit_qualification & (1U << 1); | 5346 | error_code = exit_qualification & (1U << 1); |
5347 | /* It is a fetch fault? */ | ||
5348 | error_code |= (exit_qualification & (1U << 2)) << 2; | ||
5305 | /* ept page table is present? */ | 5349 | /* ept page table is present? */ |
5306 | error_code |= (exit_qualification >> 3) & 0x1; | 5350 | error_code |= (exit_qualification >> 3) & 0x1; |
5307 | 5351 | ||
5352 | vcpu->arch.exit_qualification = exit_qualification; | ||
5353 | |||
5308 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | 5354 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); |
5309 | } | 5355 | } |
5310 | 5356 | ||
@@ -5438,7 +5484,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5438 | 5484 | ||
5439 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); | 5485 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); |
5440 | 5486 | ||
5441 | if (err == EMULATE_DO_MMIO) { | 5487 | if (err == EMULATE_USER_EXIT) { |
5488 | ++vcpu->stat.mmio_exits; | ||
5442 | ret = 0; | 5489 | ret = 0; |
5443 | goto out; | 5490 | goto out; |
5444 | } | 5491 | } |
@@ -5567,8 +5614,47 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | |||
5567 | free_loaded_vmcs(&vmx->vmcs01); | 5614 | free_loaded_vmcs(&vmx->vmcs01); |
5568 | } | 5615 | } |
5569 | 5616 | ||
5617 | /* | ||
5618 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
5619 | * set the success or error code of an emulated VMX instruction, as specified | ||
5620 | * by Vol 2B, VMX Instruction Reference, "Conventions". | ||
5621 | */ | ||
5622 | static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | ||
5623 | { | ||
5624 | vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | ||
5625 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5626 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | ||
5627 | } | ||
5628 | |||
5629 | static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | ||
5630 | { | ||
5631 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5632 | & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | ||
5633 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5634 | | X86_EFLAGS_CF); | ||
5635 | } | ||
5636 | |||
5570 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | 5637 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, |
5571 | u32 vm_instruction_error); | 5638 | u32 vm_instruction_error) |
5639 | { | ||
5640 | if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | ||
5641 | /* | ||
5642 | * failValid writes the error number to the current VMCS, which | ||
5643 | * can't be done there isn't a current VMCS. | ||
5644 | */ | ||
5645 | nested_vmx_failInvalid(vcpu); | ||
5646 | return; | ||
5647 | } | ||
5648 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5649 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5650 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5651 | | X86_EFLAGS_ZF); | ||
5652 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | ||
5653 | /* | ||
5654 | * We don't need to force a shadow sync because | ||
5655 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5656 | */ | ||
5657 | } | ||
5572 | 5658 | ||
5573 | /* | 5659 | /* |
5574 | * Emulate the VMXON instruction. | 5660 | * Emulate the VMXON instruction. |
@@ -5583,6 +5669,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5583 | struct kvm_segment cs; | 5669 | struct kvm_segment cs; |
5584 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5670 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5585 | struct vmcs *shadow_vmcs; | 5671 | struct vmcs *shadow_vmcs; |
5672 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | ||
5673 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | ||
5586 | 5674 | ||
5587 | /* The Intel VMX Instruction Reference lists a bunch of bits that | 5675 | /* The Intel VMX Instruction Reference lists a bunch of bits that |
5588 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | 5676 | * are prerequisite to running VMXON, most notably cr4.VMXE must be |
@@ -5611,6 +5699,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5611 | skip_emulated_instruction(vcpu); | 5699 | skip_emulated_instruction(vcpu); |
5612 | return 1; | 5700 | return 1; |
5613 | } | 5701 | } |
5702 | |||
5703 | if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES) | ||
5704 | != VMXON_NEEDED_FEATURES) { | ||
5705 | kvm_inject_gp(vcpu, 0); | ||
5706 | return 1; | ||
5707 | } | ||
5708 | |||
5614 | if (enable_shadow_vmcs) { | 5709 | if (enable_shadow_vmcs) { |
5615 | shadow_vmcs = alloc_vmcs(); | 5710 | shadow_vmcs = alloc_vmcs(); |
5616 | if (!shadow_vmcs) | 5711 | if (!shadow_vmcs) |
@@ -5628,6 +5723,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5628 | vmx->nested.vmxon = true; | 5723 | vmx->nested.vmxon = true; |
5629 | 5724 | ||
5630 | skip_emulated_instruction(vcpu); | 5725 | skip_emulated_instruction(vcpu); |
5726 | nested_vmx_succeed(vcpu); | ||
5631 | return 1; | 5727 | return 1; |
5632 | } | 5728 | } |
5633 | 5729 | ||
@@ -5712,6 +5808,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) | |||
5712 | return 1; | 5808 | return 1; |
5713 | free_nested(to_vmx(vcpu)); | 5809 | free_nested(to_vmx(vcpu)); |
5714 | skip_emulated_instruction(vcpu); | 5810 | skip_emulated_instruction(vcpu); |
5811 | nested_vmx_succeed(vcpu); | ||
5715 | return 1; | 5812 | return 1; |
5716 | } | 5813 | } |
5717 | 5814 | ||
@@ -5768,48 +5865,6 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, | |||
5768 | return 0; | 5865 | return 0; |
5769 | } | 5866 | } |
5770 | 5867 | ||
5771 | /* | ||
5772 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
5773 | * set the success or error code of an emulated VMX instruction, as specified | ||
5774 | * by Vol 2B, VMX Instruction Reference, "Conventions". | ||
5775 | */ | ||
5776 | static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | ||
5777 | { | ||
5778 | vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | ||
5779 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5780 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | ||
5781 | } | ||
5782 | |||
5783 | static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | ||
5784 | { | ||
5785 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5786 | & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | ||
5787 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5788 | | X86_EFLAGS_CF); | ||
5789 | } | ||
5790 | |||
5791 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | ||
5792 | u32 vm_instruction_error) | ||
5793 | { | ||
5794 | if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | ||
5795 | /* | ||
5796 | * failValid writes the error number to the current VMCS, which | ||
5797 | * can't be done there isn't a current VMCS. | ||
5798 | */ | ||
5799 | nested_vmx_failInvalid(vcpu); | ||
5800 | return; | ||
5801 | } | ||
5802 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5803 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5804 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5805 | | X86_EFLAGS_ZF); | ||
5806 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | ||
5807 | /* | ||
5808 | * We don't need to force a shadow sync because | ||
5809 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5810 | */ | ||
5811 | } | ||
5812 | |||
5813 | /* Emulate the VMCLEAR instruction */ | 5868 | /* Emulate the VMCLEAR instruction */ |
5814 | static int handle_vmclear(struct kvm_vcpu *vcpu) | 5869 | static int handle_vmclear(struct kvm_vcpu *vcpu) |
5815 | { | 5870 | { |
@@ -5972,8 +6027,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
5972 | unsigned long field; | 6027 | unsigned long field; |
5973 | u64 field_value; | 6028 | u64 field_value; |
5974 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | 6029 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; |
5975 | unsigned long *fields = (unsigned long *)shadow_read_write_fields; | 6030 | const unsigned long *fields = shadow_read_write_fields; |
5976 | int num_fields = max_shadow_read_write_fields; | 6031 | const int num_fields = max_shadow_read_write_fields; |
5977 | 6032 | ||
5978 | vmcs_load(shadow_vmcs); | 6033 | vmcs_load(shadow_vmcs); |
5979 | 6034 | ||
@@ -6002,12 +6057,11 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
6002 | 6057 | ||
6003 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | 6058 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) |
6004 | { | 6059 | { |
6005 | unsigned long *fields[] = { | 6060 | const unsigned long *fields[] = { |
6006 | (unsigned long *)shadow_read_write_fields, | 6061 | shadow_read_write_fields, |
6007 | (unsigned long *)shadow_read_only_fields | 6062 | shadow_read_only_fields |
6008 | }; | 6063 | }; |
6009 | int num_lists = ARRAY_SIZE(fields); | 6064 | const int max_fields[] = { |
6010 | int max_fields[] = { | ||
6011 | max_shadow_read_write_fields, | 6065 | max_shadow_read_write_fields, |
6012 | max_shadow_read_only_fields | 6066 | max_shadow_read_only_fields |
6013 | }; | 6067 | }; |
@@ -6018,7 +6072,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | |||
6018 | 6072 | ||
6019 | vmcs_load(shadow_vmcs); | 6073 | vmcs_load(shadow_vmcs); |
6020 | 6074 | ||
6021 | for (q = 0; q < num_lists; q++) { | 6075 | for (q = 0; q < ARRAY_SIZE(fields); q++) { |
6022 | for (i = 0; i < max_fields[q]; i++) { | 6076 | for (i = 0; i < max_fields[q]; i++) { |
6023 | field = fields[q][i]; | 6077 | field = fields[q][i]; |
6024 | vmcs12_read_any(&vmx->vcpu, field, &field_value); | 6078 | vmcs12_read_any(&vmx->vcpu, field, &field_value); |
@@ -6248,6 +6302,74 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) | |||
6248 | return 1; | 6302 | return 1; |
6249 | } | 6303 | } |
6250 | 6304 | ||
6305 | /* Emulate the INVEPT instruction */ | ||
6306 | static int handle_invept(struct kvm_vcpu *vcpu) | ||
6307 | { | ||
6308 | u32 vmx_instruction_info, types; | ||
6309 | unsigned long type; | ||
6310 | gva_t gva; | ||
6311 | struct x86_exception e; | ||
6312 | struct { | ||
6313 | u64 eptp, gpa; | ||
6314 | } operand; | ||
6315 | u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK; | ||
6316 | |||
6317 | if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || | ||
6318 | !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { | ||
6319 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6320 | return 1; | ||
6321 | } | ||
6322 | |||
6323 | if (!nested_vmx_check_permission(vcpu)) | ||
6324 | return 1; | ||
6325 | |||
6326 | if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { | ||
6327 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6328 | return 1; | ||
6329 | } | ||
6330 | |||
6331 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
6332 | type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); | ||
6333 | |||
6334 | types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; | ||
6335 | |||
6336 | if (!(types & (1UL << type))) { | ||
6337 | nested_vmx_failValid(vcpu, | ||
6338 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
6339 | return 1; | ||
6340 | } | ||
6341 | |||
6342 | /* According to the Intel VMX instruction reference, the memory | ||
6343 | * operand is read even if it isn't needed (e.g., for type==global) | ||
6344 | */ | ||
6345 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | ||
6346 | vmx_instruction_info, &gva)) | ||
6347 | return 1; | ||
6348 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, | ||
6349 | sizeof(operand), &e)) { | ||
6350 | kvm_inject_page_fault(vcpu, &e); | ||
6351 | return 1; | ||
6352 | } | ||
6353 | |||
6354 | switch (type) { | ||
6355 | case VMX_EPT_EXTENT_CONTEXT: | ||
6356 | if ((operand.eptp & eptp_mask) != | ||
6357 | (nested_ept_get_cr3(vcpu) & eptp_mask)) | ||
6358 | break; | ||
6359 | case VMX_EPT_EXTENT_GLOBAL: | ||
6360 | kvm_mmu_sync_roots(vcpu); | ||
6361 | kvm_mmu_flush_tlb(vcpu); | ||
6362 | nested_vmx_succeed(vcpu); | ||
6363 | break; | ||
6364 | default: | ||
6365 | BUG_ON(1); | ||
6366 | break; | ||
6367 | } | ||
6368 | |||
6369 | skip_emulated_instruction(vcpu); | ||
6370 | return 1; | ||
6371 | } | ||
6372 | |||
6251 | /* | 6373 | /* |
6252 | * The exit handlers return 1 if the exit was handled fully and guest execution | 6374 | * The exit handlers return 1 if the exit was handled fully and guest execution |
6253 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 6375 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -6292,6 +6414,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
6292 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | 6414 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, |
6293 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, | 6415 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, |
6294 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, | 6416 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, |
6417 | [EXIT_REASON_INVEPT] = handle_invept, | ||
6295 | }; | 6418 | }; |
6296 | 6419 | ||
6297 | static const int kvm_vmx_max_exit_handlers = | 6420 | static const int kvm_vmx_max_exit_handlers = |
@@ -6518,6 +6641,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6518 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: | 6641 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: |
6519 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: | 6642 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: |
6520 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: | 6643 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: |
6644 | case EXIT_REASON_INVEPT: | ||
6521 | /* | 6645 | /* |
6522 | * VMX instructions trap unconditionally. This allows L1 to | 6646 | * VMX instructions trap unconditionally. This allows L1 to |
6523 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | 6647 | * emulate them for its L2 guest, i.e., allows 3-level nesting! |
@@ -6550,7 +6674,20 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6550 | return nested_cpu_has2(vmcs12, | 6674 | return nested_cpu_has2(vmcs12, |
6551 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 6675 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
6552 | case EXIT_REASON_EPT_VIOLATION: | 6676 | case EXIT_REASON_EPT_VIOLATION: |
6677 | /* | ||
6678 | * L0 always deals with the EPT violation. If nested EPT is | ||
6679 | * used, and the nested mmu code discovers that the address is | ||
6680 | * missing in the guest EPT table (EPT12), the EPT violation | ||
6681 | * will be injected with nested_ept_inject_page_fault() | ||
6682 | */ | ||
6683 | return 0; | ||
6553 | case EXIT_REASON_EPT_MISCONFIG: | 6684 | case EXIT_REASON_EPT_MISCONFIG: |
6685 | /* | ||
6686 | * L2 never uses directly L1's EPT, but rather L0's own EPT | ||
6687 | * table (shadow on EPT) or a merged EPT table that L0 built | ||
6688 | * (EPT on EPT). So any problems with the structure of the | ||
6689 | * table is L0's fault. | ||
6690 | */ | ||
6554 | return 0; | 6691 | return 0; |
6555 | case EXIT_REASON_PREEMPTION_TIMER: | 6692 | case EXIT_REASON_PREEMPTION_TIMER: |
6556 | return vmcs12->pin_based_vm_exec_control & | 6693 | return vmcs12->pin_based_vm_exec_control & |
@@ -6638,7 +6775,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6638 | 6775 | ||
6639 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && | 6776 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && |
6640 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( | 6777 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( |
6641 | get_vmcs12(vcpu), vcpu)))) { | 6778 | get_vmcs12(vcpu))))) { |
6642 | if (vmx_interrupt_allowed(vcpu)) { | 6779 | if (vmx_interrupt_allowed(vcpu)) { |
6643 | vmx->soft_vnmi_blocked = 0; | 6780 | vmx->soft_vnmi_blocked = 0; |
6644 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | 6781 | } else if (vmx->vnmi_blocked_time > 1000000000LL && |
@@ -7326,6 +7463,48 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
7326 | entry->ecx |= bit(X86_FEATURE_VMX); | 7463 | entry->ecx |= bit(X86_FEATURE_VMX); |
7327 | } | 7464 | } |
7328 | 7465 | ||
7466 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | ||
7467 | struct x86_exception *fault) | ||
7468 | { | ||
7469 | struct vmcs12 *vmcs12; | ||
7470 | nested_vmx_vmexit(vcpu); | ||
7471 | vmcs12 = get_vmcs12(vcpu); | ||
7472 | |||
7473 | if (fault->error_code & PFERR_RSVD_MASK) | ||
7474 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; | ||
7475 | else | ||
7476 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; | ||
7477 | vmcs12->exit_qualification = vcpu->arch.exit_qualification; | ||
7478 | vmcs12->guest_physical_address = fault->address; | ||
7479 | } | ||
7480 | |||
7481 | /* Callbacks for nested_ept_init_mmu_context: */ | ||
7482 | |||
7483 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | ||
7484 | { | ||
7485 | /* return the page table to be shadowed - in our case, EPT12 */ | ||
7486 | return get_vmcs12(vcpu)->ept_pointer; | ||
7487 | } | ||
7488 | |||
7489 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | ||
7490 | { | ||
7491 | int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, | ||
7492 | nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); | ||
7493 | |||
7494 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | ||
7495 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | ||
7496 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | ||
7497 | |||
7498 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | ||
7499 | |||
7500 | return r; | ||
7501 | } | ||
7502 | |||
7503 | static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) | ||
7504 | { | ||
7505 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
7506 | } | ||
7507 | |||
7329 | /* | 7508 | /* |
7330 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 7509 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
7331 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 7510 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
@@ -7388,7 +7567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7388 | vmcs12->guest_interruptibility_info); | 7567 | vmcs12->guest_interruptibility_info); |
7389 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 7568 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
7390 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); | 7569 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
7391 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); | 7570 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
7392 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 7571 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
7393 | vmcs12->guest_pending_dbg_exceptions); | 7572 | vmcs12->guest_pending_dbg_exceptions); |
7394 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); | 7573 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); |
@@ -7508,15 +7687,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7508 | vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; | 7687 | vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; |
7509 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | 7688 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); |
7510 | 7689 | ||
7511 | /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */ | 7690 | /* L2->L1 exit controls are emulated - the hardware exit is to L0 so |
7512 | vmcs_write32(VM_EXIT_CONTROLS, | 7691 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER |
7513 | vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); | 7692 | * bits are further modified by vmx_set_efer() below. |
7514 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | | 7693 | */ |
7694 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); | ||
7695 | |||
7696 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | ||
7697 | * emulated by vmx_set_efer(), below. | ||
7698 | */ | ||
7699 | vmcs_write32(VM_ENTRY_CONTROLS, | ||
7700 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & | ||
7701 | ~VM_ENTRY_IA32E_MODE) | | ||
7515 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 7702 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
7516 | 7703 | ||
7517 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) | 7704 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) { |
7518 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); | 7705 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); |
7519 | else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | 7706 | vcpu->arch.pat = vmcs12->guest_ia32_pat; |
7707 | } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | ||
7520 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | 7708 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); |
7521 | 7709 | ||
7522 | 7710 | ||
@@ -7538,6 +7726,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7538 | vmx_flush_tlb(vcpu); | 7726 | vmx_flush_tlb(vcpu); |
7539 | } | 7727 | } |
7540 | 7728 | ||
7729 | if (nested_cpu_has_ept(vmcs12)) { | ||
7730 | kvm_mmu_unload(vcpu); | ||
7731 | nested_ept_init_mmu_context(vcpu); | ||
7732 | } | ||
7733 | |||
7541 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 7734 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) |
7542 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 7735 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
7543 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 7736 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
@@ -7565,6 +7758,16 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7565 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); | 7758 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); |
7566 | kvm_mmu_reset_context(vcpu); | 7759 | kvm_mmu_reset_context(vcpu); |
7567 | 7760 | ||
7761 | /* | ||
7762 | * L1 may access the L2's PDPTR, so save them to construct vmcs12 | ||
7763 | */ | ||
7764 | if (enable_ept) { | ||
7765 | vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); | ||
7766 | vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); | ||
7767 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); | ||
7768 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); | ||
7769 | } | ||
7770 | |||
7568 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); | 7771 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); |
7569 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); | 7772 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); |
7570 | } | 7773 | } |
@@ -7887,6 +8090,22 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7887 | vmcs12->guest_pending_dbg_exceptions = | 8090 | vmcs12->guest_pending_dbg_exceptions = |
7888 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 8091 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
7889 | 8092 | ||
8093 | /* | ||
8094 | * In some cases (usually, nested EPT), L2 is allowed to change its | ||
8095 | * own CR3 without exiting. If it has changed it, we must keep it. | ||
8096 | * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined | ||
8097 | * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12. | ||
8098 | * | ||
8099 | * Additionally, restore L2's PDPTR to vmcs12. | ||
8100 | */ | ||
8101 | if (enable_ept) { | ||
8102 | vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3); | ||
8103 | vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); | ||
8104 | vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); | ||
8105 | vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); | ||
8106 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); | ||
8107 | } | ||
8108 | |||
7890 | vmcs12->vm_entry_controls = | 8109 | vmcs12->vm_entry_controls = |
7891 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 8110 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
7892 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | 8111 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); |
@@ -7948,6 +8167,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7948 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | 8167 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
7949 | struct vmcs12 *vmcs12) | 8168 | struct vmcs12 *vmcs12) |
7950 | { | 8169 | { |
8170 | struct kvm_segment seg; | ||
8171 | |||
7951 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 8172 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
7952 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 8173 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
7953 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | 8174 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) |
@@ -7982,7 +8203,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
7982 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); | 8203 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); |
7983 | kvm_set_cr4(vcpu, vmcs12->host_cr4); | 8204 | kvm_set_cr4(vcpu, vmcs12->host_cr4); |
7984 | 8205 | ||
7985 | /* shadow page tables on either EPT or shadow page tables */ | 8206 | if (nested_cpu_has_ept(vmcs12)) |
8207 | nested_ept_uninit_mmu_context(vcpu); | ||
8208 | |||
7986 | kvm_set_cr3(vcpu, vmcs12->host_cr3); | 8209 | kvm_set_cr3(vcpu, vmcs12->host_cr3); |
7987 | kvm_mmu_reset_context(vcpu); | 8210 | kvm_mmu_reset_context(vcpu); |
7988 | 8211 | ||
@@ -8001,23 +8224,61 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8001 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); | 8224 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); |
8002 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); | 8225 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); |
8003 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); | 8226 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); |
8004 | vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base); | 8227 | |
8005 | vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base); | 8228 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { |
8006 | vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base); | ||
8007 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector); | ||
8008 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector); | ||
8009 | vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector); | ||
8010 | vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector); | ||
8011 | vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector); | ||
8012 | vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector); | ||
8013 | vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector); | ||
8014 | |||
8015 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) | ||
8016 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); | 8229 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); |
8230 | vcpu->arch.pat = vmcs12->host_ia32_pat; | ||
8231 | } | ||
8017 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | 8232 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) |
8018 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | 8233 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, |
8019 | vmcs12->host_ia32_perf_global_ctrl); | 8234 | vmcs12->host_ia32_perf_global_ctrl); |
8020 | 8235 | ||
8236 | /* Set L1 segment info according to Intel SDM | ||
8237 | 27.5.2 Loading Host Segment and Descriptor-Table Registers */ | ||
8238 | seg = (struct kvm_segment) { | ||
8239 | .base = 0, | ||
8240 | .limit = 0xFFFFFFFF, | ||
8241 | .selector = vmcs12->host_cs_selector, | ||
8242 | .type = 11, | ||
8243 | .present = 1, | ||
8244 | .s = 1, | ||
8245 | .g = 1 | ||
8246 | }; | ||
8247 | if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | ||
8248 | seg.l = 1; | ||
8249 | else | ||
8250 | seg.db = 1; | ||
8251 | vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); | ||
8252 | seg = (struct kvm_segment) { | ||
8253 | .base = 0, | ||
8254 | .limit = 0xFFFFFFFF, | ||
8255 | .type = 3, | ||
8256 | .present = 1, | ||
8257 | .s = 1, | ||
8258 | .db = 1, | ||
8259 | .g = 1 | ||
8260 | }; | ||
8261 | seg.selector = vmcs12->host_ds_selector; | ||
8262 | vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); | ||
8263 | seg.selector = vmcs12->host_es_selector; | ||
8264 | vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); | ||
8265 | seg.selector = vmcs12->host_ss_selector; | ||
8266 | vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); | ||
8267 | seg.selector = vmcs12->host_fs_selector; | ||
8268 | seg.base = vmcs12->host_fs_base; | ||
8269 | vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); | ||
8270 | seg.selector = vmcs12->host_gs_selector; | ||
8271 | seg.base = vmcs12->host_gs_base; | ||
8272 | vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); | ||
8273 | seg = (struct kvm_segment) { | ||
8274 | .base = vmcs12->host_tr_base, | ||
8275 | .limit = 0x67, | ||
8276 | .selector = vmcs12->host_tr_selector, | ||
8277 | .type = 11, | ||
8278 | .present = 1 | ||
8279 | }; | ||
8280 | vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); | ||
8281 | |||
8021 | kvm_set_dr(vcpu, 7, 0x400); | 8282 | kvm_set_dr(vcpu, 7, 0x400); |
8022 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | 8283 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
8023 | } | 8284 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d21bce505315..e5ca72a5cdb6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -682,17 +682,6 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
682 | */ | 682 | */ |
683 | } | 683 | } |
684 | 684 | ||
685 | /* | ||
686 | * Does the new cr3 value map to physical memory? (Note, we | ||
687 | * catch an invalid cr3 even in real-mode, because it would | ||
688 | * cause trouble later on when we turn on paging anyway.) | ||
689 | * | ||
690 | * A real CPU would silently accept an invalid cr3 and would | ||
691 | * attempt to use it - with largely undefined (and often hard | ||
692 | * to debug) behavior on the guest side. | ||
693 | */ | ||
694 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | ||
695 | return 1; | ||
696 | vcpu->arch.cr3 = cr3; | 685 | vcpu->arch.cr3 = cr3; |
697 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | 686 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); |
698 | vcpu->arch.mmu.new_cr3(vcpu); | 687 | vcpu->arch.mmu.new_cr3(vcpu); |
@@ -850,7 +839,8 @@ static u32 msrs_to_save[] = { | |||
850 | #ifdef CONFIG_X86_64 | 839 | #ifdef CONFIG_X86_64 |
851 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 840 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
852 | #endif | 841 | #endif |
853 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | 842 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
843 | MSR_IA32_FEATURE_CONTROL | ||
854 | }; | 844 | }; |
855 | 845 | ||
856 | static unsigned num_msrs_to_save; | 846 | static unsigned num_msrs_to_save; |
@@ -1457,6 +1447,29 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) | |||
1457 | #endif | 1447 | #endif |
1458 | } | 1448 | } |
1459 | 1449 | ||
1450 | static void kvm_gen_update_masterclock(struct kvm *kvm) | ||
1451 | { | ||
1452 | #ifdef CONFIG_X86_64 | ||
1453 | int i; | ||
1454 | struct kvm_vcpu *vcpu; | ||
1455 | struct kvm_arch *ka = &kvm->arch; | ||
1456 | |||
1457 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
1458 | kvm_make_mclock_inprogress_request(kvm); | ||
1459 | /* no guest entries from this point */ | ||
1460 | pvclock_update_vm_gtod_copy(kvm); | ||
1461 | |||
1462 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1463 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
1464 | |||
1465 | /* guest entries allowed */ | ||
1466 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1467 | clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); | ||
1468 | |||
1469 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
1470 | #endif | ||
1471 | } | ||
1472 | |||
1460 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1473 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
1461 | { | 1474 | { |
1462 | unsigned long flags, this_tsc_khz; | 1475 | unsigned long flags, this_tsc_khz; |
@@ -3806,6 +3819,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3806 | delta = user_ns.clock - now_ns; | 3819 | delta = user_ns.clock - now_ns; |
3807 | local_irq_enable(); | 3820 | local_irq_enable(); |
3808 | kvm->arch.kvmclock_offset = delta; | 3821 | kvm->arch.kvmclock_offset = delta; |
3822 | kvm_gen_update_masterclock(kvm); | ||
3809 | break; | 3823 | break; |
3810 | } | 3824 | } |
3811 | case KVM_GET_CLOCK: { | 3825 | case KVM_GET_CLOCK: { |
@@ -4955,6 +4969,97 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
4955 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); | 4969 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); |
4956 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); | 4970 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); |
4957 | 4971 | ||
4972 | static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, | ||
4973 | unsigned long *db) | ||
4974 | { | ||
4975 | u32 dr6 = 0; | ||
4976 | int i; | ||
4977 | u32 enable, rwlen; | ||
4978 | |||
4979 | enable = dr7; | ||
4980 | rwlen = dr7 >> 16; | ||
4981 | for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4) | ||
4982 | if ((enable & 3) && (rwlen & 15) == type && db[i] == addr) | ||
4983 | dr6 |= (1 << i); | ||
4984 | return dr6; | ||
4985 | } | ||
4986 | |||
4987 | static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) | ||
4988 | { | ||
4989 | struct kvm_run *kvm_run = vcpu->run; | ||
4990 | |||
4991 | /* | ||
4992 | * Use the "raw" value to see if TF was passed to the processor. | ||
4993 | * Note that the new value of the flags has not been saved yet. | ||
4994 | * | ||
4995 | * This is correct even for TF set by the guest, because "the | ||
4996 | * processor will not generate this exception after the instruction | ||
4997 | * that sets the TF flag". | ||
4998 | */ | ||
4999 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5000 | |||
5001 | if (unlikely(rflags & X86_EFLAGS_TF)) { | ||
5002 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | ||
5003 | kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; | ||
5004 | kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; | ||
5005 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
5006 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
5007 | *r = EMULATE_USER_EXIT; | ||
5008 | } else { | ||
5009 | vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF; | ||
5010 | /* | ||
5011 | * "Certain debug exceptions may clear bit 0-3. The | ||
5012 | * remaining contents of the DR6 register are never | ||
5013 | * cleared by the processor". | ||
5014 | */ | ||
5015 | vcpu->arch.dr6 &= ~15; | ||
5016 | vcpu->arch.dr6 |= DR6_BS; | ||
5017 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
5018 | } | ||
5019 | } | ||
5020 | } | ||
5021 | |||
5022 | static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | ||
5023 | { | ||
5024 | struct kvm_run *kvm_run = vcpu->run; | ||
5025 | unsigned long eip = vcpu->arch.emulate_ctxt.eip; | ||
5026 | u32 dr6 = 0; | ||
5027 | |||
5028 | if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && | ||
5029 | (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { | ||
5030 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, | ||
5031 | vcpu->arch.guest_debug_dr7, | ||
5032 | vcpu->arch.eff_db); | ||
5033 | |||
5034 | if (dr6 != 0) { | ||
5035 | kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; | ||
5036 | kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + | ||
5037 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
5038 | |||
5039 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
5040 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
5041 | *r = EMULATE_USER_EXIT; | ||
5042 | return true; | ||
5043 | } | ||
5044 | } | ||
5045 | |||
5046 | if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { | ||
5047 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, | ||
5048 | vcpu->arch.dr7, | ||
5049 | vcpu->arch.db); | ||
5050 | |||
5051 | if (dr6 != 0) { | ||
5052 | vcpu->arch.dr6 &= ~15; | ||
5053 | vcpu->arch.dr6 |= dr6; | ||
5054 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
5055 | *r = EMULATE_DONE; | ||
5056 | return true; | ||
5057 | } | ||
5058 | } | ||
5059 | |||
5060 | return false; | ||
5061 | } | ||
5062 | |||
4958 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 5063 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4959 | unsigned long cr2, | 5064 | unsigned long cr2, |
4960 | int emulation_type, | 5065 | int emulation_type, |
@@ -4975,6 +5080,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4975 | 5080 | ||
4976 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 5081 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4977 | init_emulate_ctxt(vcpu); | 5082 | init_emulate_ctxt(vcpu); |
5083 | |||
5084 | /* | ||
5085 | * We will reenter on the same instruction since | ||
5086 | * we do not set complete_userspace_io. This does not | ||
5087 | * handle watchpoints yet, those would be handled in | ||
5088 | * the emulate_ops. | ||
5089 | */ | ||
5090 | if (kvm_vcpu_check_breakpoint(vcpu, &r)) | ||
5091 | return r; | ||
5092 | |||
4978 | ctxt->interruptibility = 0; | 5093 | ctxt->interruptibility = 0; |
4979 | ctxt->have_exception = false; | 5094 | ctxt->have_exception = false; |
4980 | ctxt->perm_ok = false; | 5095 | ctxt->perm_ok = false; |
@@ -5031,17 +5146,18 @@ restart: | |||
5031 | inject_emulated_exception(vcpu); | 5146 | inject_emulated_exception(vcpu); |
5032 | r = EMULATE_DONE; | 5147 | r = EMULATE_DONE; |
5033 | } else if (vcpu->arch.pio.count) { | 5148 | } else if (vcpu->arch.pio.count) { |
5034 | if (!vcpu->arch.pio.in) | 5149 | if (!vcpu->arch.pio.in) { |
5150 | /* FIXME: return into emulator if single-stepping. */ | ||
5035 | vcpu->arch.pio.count = 0; | 5151 | vcpu->arch.pio.count = 0; |
5036 | else { | 5152 | } else { |
5037 | writeback = false; | 5153 | writeback = false; |
5038 | vcpu->arch.complete_userspace_io = complete_emulated_pio; | 5154 | vcpu->arch.complete_userspace_io = complete_emulated_pio; |
5039 | } | 5155 | } |
5040 | r = EMULATE_DO_MMIO; | 5156 | r = EMULATE_USER_EXIT; |
5041 | } else if (vcpu->mmio_needed) { | 5157 | } else if (vcpu->mmio_needed) { |
5042 | if (!vcpu->mmio_is_write) | 5158 | if (!vcpu->mmio_is_write) |
5043 | writeback = false; | 5159 | writeback = false; |
5044 | r = EMULATE_DO_MMIO; | 5160 | r = EMULATE_USER_EXIT; |
5045 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; | 5161 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; |
5046 | } else if (r == EMULATION_RESTART) | 5162 | } else if (r == EMULATION_RESTART) |
5047 | goto restart; | 5163 | goto restart; |
@@ -5050,10 +5166,12 @@ restart: | |||
5050 | 5166 | ||
5051 | if (writeback) { | 5167 | if (writeback) { |
5052 | toggle_interruptibility(vcpu, ctxt->interruptibility); | 5168 | toggle_interruptibility(vcpu, ctxt->interruptibility); |
5053 | kvm_set_rflags(vcpu, ctxt->eflags); | ||
5054 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5169 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5055 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 5170 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
5056 | kvm_rip_write(vcpu, ctxt->eip); | 5171 | kvm_rip_write(vcpu, ctxt->eip); |
5172 | if (r == EMULATE_DONE) | ||
5173 | kvm_vcpu_check_singlestep(vcpu, &r); | ||
5174 | kvm_set_rflags(vcpu, ctxt->eflags); | ||
5057 | } else | 5175 | } else |
5058 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | 5176 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; |
5059 | 5177 | ||
@@ -5347,7 +5465,7 @@ static struct notifier_block pvclock_gtod_notifier = { | |||
5347 | int kvm_arch_init(void *opaque) | 5465 | int kvm_arch_init(void *opaque) |
5348 | { | 5466 | { |
5349 | int r; | 5467 | int r; |
5350 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 5468 | struct kvm_x86_ops *ops = opaque; |
5351 | 5469 | ||
5352 | if (kvm_x86_ops) { | 5470 | if (kvm_x86_ops) { |
5353 | printk(KERN_ERR "kvm: already loaded the other module\n"); | 5471 | printk(KERN_ERR "kvm: already loaded the other module\n"); |
@@ -5495,6 +5613,23 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
5495 | return 1; | 5613 | return 1; |
5496 | } | 5614 | } |
5497 | 5615 | ||
5616 | /* | ||
5617 | * kvm_pv_kick_cpu_op: Kick a vcpu. | ||
5618 | * | ||
5619 | * @apicid - apicid of vcpu to be kicked. | ||
5620 | */ | ||
5621 | static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | ||
5622 | { | ||
5623 | struct kvm_lapic_irq lapic_irq; | ||
5624 | |||
5625 | lapic_irq.shorthand = 0; | ||
5626 | lapic_irq.dest_mode = 0; | ||
5627 | lapic_irq.dest_id = apicid; | ||
5628 | |||
5629 | lapic_irq.delivery_mode = APIC_DM_REMRD; | ||
5630 | kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); | ||
5631 | } | ||
5632 | |||
5498 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 5633 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
5499 | { | 5634 | { |
5500 | unsigned long nr, a0, a1, a2, a3, ret; | 5635 | unsigned long nr, a0, a1, a2, a3, ret; |
@@ -5528,6 +5663,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5528 | case KVM_HC_VAPIC_POLL_IRQ: | 5663 | case KVM_HC_VAPIC_POLL_IRQ: |
5529 | ret = 0; | 5664 | ret = 0; |
5530 | break; | 5665 | break; |
5666 | case KVM_HC_KICK_CPU: | ||
5667 | kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); | ||
5668 | ret = 0; | ||
5669 | break; | ||
5531 | default: | 5670 | default: |
5532 | ret = -KVM_ENOSYS; | 5671 | ret = -KVM_ENOSYS; |
5533 | break; | 5672 | break; |
@@ -5689,29 +5828,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) | |||
5689 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5828 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5690 | } | 5829 | } |
5691 | 5830 | ||
5692 | static void kvm_gen_update_masterclock(struct kvm *kvm) | ||
5693 | { | ||
5694 | #ifdef CONFIG_X86_64 | ||
5695 | int i; | ||
5696 | struct kvm_vcpu *vcpu; | ||
5697 | struct kvm_arch *ka = &kvm->arch; | ||
5698 | |||
5699 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
5700 | kvm_make_mclock_inprogress_request(kvm); | ||
5701 | /* no guest entries from this point */ | ||
5702 | pvclock_update_vm_gtod_copy(kvm); | ||
5703 | |||
5704 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5705 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
5706 | |||
5707 | /* guest entries allowed */ | ||
5708 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5709 | clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); | ||
5710 | |||
5711 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
5712 | #endif | ||
5713 | } | ||
5714 | |||
5715 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | 5831 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
5716 | { | 5832 | { |
5717 | u64 eoi_exit_bitmap[4]; | 5833 | u64 eoi_exit_bitmap[4]; |
@@ -5950,6 +6066,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5950 | kvm_apic_accept_events(vcpu); | 6066 | kvm_apic_accept_events(vcpu); |
5951 | switch(vcpu->arch.mp_state) { | 6067 | switch(vcpu->arch.mp_state) { |
5952 | case KVM_MP_STATE_HALTED: | 6068 | case KVM_MP_STATE_HALTED: |
6069 | vcpu->arch.pv.pv_unhalted = false; | ||
5953 | vcpu->arch.mp_state = | 6070 | vcpu->arch.mp_state = |
5954 | KVM_MP_STATE_RUNNABLE; | 6071 | KVM_MP_STATE_RUNNABLE; |
5955 | case KVM_MP_STATE_RUNNABLE: | 6072 | case KVM_MP_STATE_RUNNABLE: |
@@ -6061,6 +6178,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) | |||
6061 | 6178 | ||
6062 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { | 6179 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { |
6063 | vcpu->mmio_needed = 0; | 6180 | vcpu->mmio_needed = 0; |
6181 | |||
6182 | /* FIXME: return into emulator if single-stepping. */ | ||
6064 | if (vcpu->mmio_is_write) | 6183 | if (vcpu->mmio_is_write) |
6065 | return 1; | 6184 | return 1; |
6066 | vcpu->mmio_read_completed = 1; | 6185 | vcpu->mmio_read_completed = 1; |
@@ -6249,7 +6368,12 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | |||
6249 | struct kvm_mp_state *mp_state) | 6368 | struct kvm_mp_state *mp_state) |
6250 | { | 6369 | { |
6251 | kvm_apic_accept_events(vcpu); | 6370 | kvm_apic_accept_events(vcpu); |
6252 | mp_state->mp_state = vcpu->arch.mp_state; | 6371 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && |
6372 | vcpu->arch.pv.pv_unhalted) | ||
6373 | mp_state->mp_state = KVM_MP_STATE_RUNNABLE; | ||
6374 | else | ||
6375 | mp_state->mp_state = vcpu->arch.mp_state; | ||
6376 | |||
6253 | return 0; | 6377 | return 0; |
6254 | } | 6378 | } |
6255 | 6379 | ||
@@ -6770,6 +6894,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6770 | BUG_ON(vcpu->kvm == NULL); | 6894 | BUG_ON(vcpu->kvm == NULL); |
6771 | kvm = vcpu->kvm; | 6895 | kvm = vcpu->kvm; |
6772 | 6896 | ||
6897 | vcpu->arch.pv.pv_unhalted = false; | ||
6773 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 6898 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
6774 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 6899 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) |
6775 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 6900 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -7019,6 +7144,15 @@ out_free: | |||
7019 | return -ENOMEM; | 7144 | return -ENOMEM; |
7020 | } | 7145 | } |
7021 | 7146 | ||
7147 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
7148 | { | ||
7149 | /* | ||
7150 | * memslots->generation has been incremented. | ||
7151 | * mmio generation may have reached its maximum value. | ||
7152 | */ | ||
7153 | kvm_mmu_invalidate_mmio_sptes(kvm); | ||
7154 | } | ||
7155 | |||
7022 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 7156 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
7023 | struct kvm_memory_slot *memslot, | 7157 | struct kvm_memory_slot *memslot, |
7024 | struct kvm_userspace_memory_region *mem, | 7158 | struct kvm_userspace_memory_region *mem, |
@@ -7079,11 +7213,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7079 | */ | 7213 | */ |
7080 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7214 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) |
7081 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7215 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
7082 | /* | ||
7083 | * If memory slot is created, or moved, we need to clear all | ||
7084 | * mmio sptes. | ||
7085 | */ | ||
7086 | kvm_mmu_invalidate_mmio_sptes(kvm); | ||
7087 | } | 7216 | } |
7088 | 7217 | ||
7089 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 7218 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
@@ -7103,6 +7232,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
7103 | !vcpu->arch.apf.halted) | 7232 | !vcpu->arch.apf.halted) |
7104 | || !list_empty_careful(&vcpu->async_pf.done) | 7233 | || !list_empty_careful(&vcpu->async_pf.done) |
7105 | || kvm_apic_has_events(vcpu) | 7234 | || kvm_apic_has_events(vcpu) |
7235 | || vcpu->arch.pv.pv_unhalted | ||
7106 | || atomic_read(&vcpu->arch.nmi_queued) || | 7236 | || atomic_read(&vcpu->arch.nmi_queued) || |
7107 | (kvm_arch_interrupt_allowed(vcpu) && | 7237 | (kvm_arch_interrupt_allowed(vcpu) && |
7108 | kvm_cpu_has_interrupt(vcpu)); | 7238 | kvm_cpu_has_interrupt(vcpu)); |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 6a22c19da663..bdf8532494fe 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -7,8 +7,7 @@ | |||
7 | * kernel and insert a module (lg.ko) which allows us to run other Linux | 7 | * kernel and insert a module (lg.ko) which allows us to run other Linux |
8 | * kernels the same way we'd run processes. We call the first kernel the Host, | 8 | * kernels the same way we'd run processes. We call the first kernel the Host, |
9 | * and the others the Guests. The program which sets up and configures Guests | 9 | * and the others the Guests. The program which sets up and configures Guests |
10 | * (such as the example in Documentation/virtual/lguest/lguest.c) is called the | 10 | * (such as the example in tools/lguest/lguest.c) is called the Launcher. |
11 | * Launcher. | ||
12 | * | 11 | * |
13 | * Secondly, we only run specially modified Guests, not normal kernels: setting | 12 | * Secondly, we only run specially modified Guests, not normal kernels: setting |
14 | * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows | 13 | * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows |
@@ -1057,6 +1056,12 @@ static void lguest_load_sp0(struct tss_struct *tss, | |||
1057 | } | 1056 | } |
1058 | 1057 | ||
1059 | /* Let's just say, I wouldn't do debugging under a Guest. */ | 1058 | /* Let's just say, I wouldn't do debugging under a Guest. */ |
1059 | static unsigned long lguest_get_debugreg(int regno) | ||
1060 | { | ||
1061 | /* FIXME: Implement */ | ||
1062 | return 0; | ||
1063 | } | ||
1064 | |||
1060 | static void lguest_set_debugreg(int regno, unsigned long value) | 1065 | static void lguest_set_debugreg(int regno, unsigned long value) |
1061 | { | 1066 | { |
1062 | /* FIXME: Implement */ | 1067 | /* FIXME: Implement */ |
@@ -1304,6 +1309,7 @@ __init void lguest_init(void) | |||
1304 | pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; | 1309 | pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; |
1305 | pv_cpu_ops.set_ldt = lguest_set_ldt; | 1310 | pv_cpu_ops.set_ldt = lguest_set_ldt; |
1306 | pv_cpu_ops.load_tls = lguest_load_tls; | 1311 | pv_cpu_ops.load_tls = lguest_load_tls; |
1312 | pv_cpu_ops.get_debugreg = lguest_get_debugreg; | ||
1307 | pv_cpu_ops.set_debugreg = lguest_set_debugreg; | 1313 | pv_cpu_ops.set_debugreg = lguest_set_debugreg; |
1308 | pv_cpu_ops.clts = lguest_clts; | 1314 | pv_cpu_ops.clts = lguest_clts; |
1309 | pv_cpu_ops.read_cr0 = lguest_read_cr0; | 1315 | pv_cpu_ops.read_cr0 = lguest_read_cr0; |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 7e73e8c69096..9d980d88b747 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -59,6 +59,10 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
59 | return NULL; | 59 | return NULL; |
60 | } | 60 | } |
61 | 61 | ||
62 | int pmd_huge_support(void) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
62 | #else | 66 | #else |
63 | 67 | ||
64 | struct page * | 68 | struct page * |
@@ -77,6 +81,10 @@ int pud_huge(pud_t pud) | |||
77 | return !!(pud_val(pud) & _PAGE_PSE); | 81 | return !!(pud_val(pud) & _PAGE_PSE); |
78 | } | 82 | } |
79 | 83 | ||
84 | int pmd_huge_support(void) | ||
85 | { | ||
86 | return 1; | ||
87 | } | ||
80 | #endif | 88 | #endif |
81 | 89 | ||
82 | /* x86_64 also uses this file */ | 90 | /* x86_64 also uses this file */ |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 282375f13c7e..ae699b3bbac8 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -103,6 +103,7 @@ static void flush_tlb_func(void *info) | |||
103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) | 103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
104 | return; | 104 | return; |
105 | 105 | ||
106 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | ||
106 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { | 107 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
107 | if (f->flush_end == TLB_FLUSH_ALL) | 108 | if (f->flush_end == TLB_FLUSH_ALL) |
108 | local_flush_tlb(); | 109 | local_flush_tlb(); |
@@ -130,6 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
130 | info.flush_start = start; | 131 | info.flush_start = start; |
131 | info.flush_end = end; | 132 | info.flush_end = end; |
132 | 133 | ||
134 | count_vm_event(NR_TLB_REMOTE_FLUSH); | ||
133 | if (is_uv_system()) { | 135 | if (is_uv_system()) { |
134 | unsigned int cpu; | 136 | unsigned int cpu; |
135 | 137 | ||
@@ -149,6 +151,7 @@ void flush_tlb_current_task(void) | |||
149 | 151 | ||
150 | preempt_disable(); | 152 | preempt_disable(); |
151 | 153 | ||
154 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
152 | local_flush_tlb(); | 155 | local_flush_tlb(); |
153 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 156 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
154 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | 157 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
@@ -211,16 +214,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
211 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; | 214 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; |
212 | 215 | ||
213 | /* tlb_flushall_shift is on balance point, details in commit log */ | 216 | /* tlb_flushall_shift is on balance point, details in commit log */ |
214 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | 217 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { |
218 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | ||
215 | local_flush_tlb(); | 219 | local_flush_tlb(); |
216 | else { | 220 | } else { |
217 | if (has_large_page(mm, start, end)) { | 221 | if (has_large_page(mm, start, end)) { |
218 | local_flush_tlb(); | 222 | local_flush_tlb(); |
219 | goto flush_all; | 223 | goto flush_all; |
220 | } | 224 | } |
221 | /* flush range by one by one 'invlpg' */ | 225 | /* flush range by one by one 'invlpg' */ |
222 | for (addr = start; addr < end; addr += PAGE_SIZE) | 226 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
227 | count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); | ||
223 | __flush_tlb_single(addr); | 228 | __flush_tlb_single(addr); |
229 | } | ||
224 | 230 | ||
225 | if (cpumask_any_but(mm_cpumask(mm), | 231 | if (cpumask_any_but(mm_cpumask(mm), |
226 | smp_processor_id()) < nr_cpu_ids) | 232 | smp_processor_id()) < nr_cpu_ids) |
@@ -256,6 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) | |||
256 | 262 | ||
257 | static void do_flush_tlb_all(void *info) | 263 | static void do_flush_tlb_all(void *info) |
258 | { | 264 | { |
265 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | ||
259 | __flush_tlb_all(); | 266 | __flush_tlb_all(); |
260 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) | 267 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) |
261 | leave_mm(smp_processor_id()); | 268 | leave_mm(smp_processor_id()); |
@@ -263,6 +270,7 @@ static void do_flush_tlb_all(void *info) | |||
263 | 270 | ||
264 | void flush_tlb_all(void) | 271 | void flush_tlb_all(void) |
265 | { | 272 | { |
273 | count_vm_event(NR_TLB_REMOTE_FLUSH); | ||
266 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 274 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
267 | } | 275 | } |
268 | 276 | ||
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 48768df2471a..6890d8498e0b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -403,7 +403,7 @@ static void nmi_cpu_down(void *dummy) | |||
403 | nmi_cpu_shutdown(dummy); | 403 | nmi_cpu_shutdown(dummy); |
404 | } | 404 | } |
405 | 405 | ||
406 | static int nmi_create_files(struct super_block *sb, struct dentry *root) | 406 | static int nmi_create_files(struct dentry *root) |
407 | { | 407 | { |
408 | unsigned int i; | 408 | unsigned int i; |
409 | 409 | ||
@@ -420,14 +420,14 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
420 | continue; | 420 | continue; |
421 | 421 | ||
422 | snprintf(buf, sizeof(buf), "%d", i); | 422 | snprintf(buf, sizeof(buf), "%d", i); |
423 | dir = oprofilefs_mkdir(sb, root, buf); | 423 | dir = oprofilefs_mkdir(root, buf); |
424 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); | 424 | oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled); |
425 | oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); | 425 | oprofilefs_create_ulong(dir, "event", &counter_config[i].event); |
426 | oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); | 426 | oprofilefs_create_ulong(dir, "count", &counter_config[i].count); |
427 | oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); | 427 | oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask); |
428 | oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); | 428 | oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel); |
429 | oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); | 429 | oprofilefs_create_ulong(dir, "user", &counter_config[i].user); |
430 | oprofilefs_create_ulong(sb, dir, "extra", &counter_config[i].extra); | 430 | oprofilefs_create_ulong(dir, "extra", &counter_config[i].extra); |
431 | } | 431 | } |
432 | 432 | ||
433 | return 0; | 433 | return 0; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index b2b94438ff05..50d86c0e9ba4 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -454,16 +454,16 @@ static void init_ibs(void) | |||
454 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); | 454 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); |
455 | } | 455 | } |
456 | 456 | ||
457 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 457 | static int (*create_arch_files)(struct dentry *root); |
458 | 458 | ||
459 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) | 459 | static int setup_ibs_files(struct dentry *root) |
460 | { | 460 | { |
461 | struct dentry *dir; | 461 | struct dentry *dir; |
462 | int ret = 0; | 462 | int ret = 0; |
463 | 463 | ||
464 | /* architecture specific files */ | 464 | /* architecture specific files */ |
465 | if (create_arch_files) | 465 | if (create_arch_files) |
466 | ret = create_arch_files(sb, root); | 466 | ret = create_arch_files(root); |
467 | 467 | ||
468 | if (ret) | 468 | if (ret) |
469 | return ret; | 469 | return ret; |
@@ -479,26 +479,26 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
479 | ibs_config.max_cnt_op = 250000; | 479 | ibs_config.max_cnt_op = 250000; |
480 | 480 | ||
481 | if (ibs_caps & IBS_CAPS_FETCHSAM) { | 481 | if (ibs_caps & IBS_CAPS_FETCHSAM) { |
482 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); | 482 | dir = oprofilefs_mkdir(root, "ibs_fetch"); |
483 | oprofilefs_create_ulong(sb, dir, "enable", | 483 | oprofilefs_create_ulong(dir, "enable", |
484 | &ibs_config.fetch_enabled); | 484 | &ibs_config.fetch_enabled); |
485 | oprofilefs_create_ulong(sb, dir, "max_count", | 485 | oprofilefs_create_ulong(dir, "max_count", |
486 | &ibs_config.max_cnt_fetch); | 486 | &ibs_config.max_cnt_fetch); |
487 | oprofilefs_create_ulong(sb, dir, "rand_enable", | 487 | oprofilefs_create_ulong(dir, "rand_enable", |
488 | &ibs_config.rand_en); | 488 | &ibs_config.rand_en); |
489 | } | 489 | } |
490 | 490 | ||
491 | if (ibs_caps & IBS_CAPS_OPSAM) { | 491 | if (ibs_caps & IBS_CAPS_OPSAM) { |
492 | dir = oprofilefs_mkdir(sb, root, "ibs_op"); | 492 | dir = oprofilefs_mkdir(root, "ibs_op"); |
493 | oprofilefs_create_ulong(sb, dir, "enable", | 493 | oprofilefs_create_ulong(dir, "enable", |
494 | &ibs_config.op_enabled); | 494 | &ibs_config.op_enabled); |
495 | oprofilefs_create_ulong(sb, dir, "max_count", | 495 | oprofilefs_create_ulong(dir, "max_count", |
496 | &ibs_config.max_cnt_op); | 496 | &ibs_config.max_cnt_op); |
497 | if (ibs_caps & IBS_CAPS_OPCNT) | 497 | if (ibs_caps & IBS_CAPS_OPCNT) |
498 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | 498 | oprofilefs_create_ulong(dir, "dispatched_ops", |
499 | &ibs_config.dispatched_ops); | 499 | &ibs_config.dispatched_ops); |
500 | if (ibs_caps & IBS_CAPS_BRNTRGT) | 500 | if (ibs_caps & IBS_CAPS_BRNTRGT) |
501 | oprofilefs_create_ulong(sb, dir, "branch_target", | 501 | oprofilefs_create_ulong(dir, "branch_target", |
502 | &ibs_config.branch_target); | 502 | &ibs_config.branch_target); |
503 | } | 503 | } |
504 | 504 | ||
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 47fe66fe61f1..3ca5957b7a34 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -20,7 +20,7 @@ | |||
20 | #include <linux/intel_pmic_gpio.h> | 20 | #include <linux/intel_pmic_gpio.h> |
21 | #include <linux/spi/spi.h> | 21 | #include <linux/spi/spi.h> |
22 | #include <linux/i2c.h> | 22 | #include <linux/i2c.h> |
23 | #include <linux/i2c/pca953x.h> | 23 | #include <linux/platform_data/pca953x.h> |
24 | #include <linux/gpio_keys.h> | 24 | #include <linux/gpio_keys.h> |
25 | #include <linux/input.h> | 25 | #include <linux/input.h> |
26 | #include <linux/platform_device.h> | 26 | #include <linux/platform_device.h> |
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c index 9d34eddb517f..96eb2bd28832 100644 --- a/arch/x86/um/os-Linux/prctl.c +++ b/arch/x86/um/os-Linux/prctl.c | |||
@@ -4,7 +4,7 @@ | |||
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <sys/ptrace.h> | 6 | #include <sys/ptrace.h> |
7 | #include <linux/ptrace.h> | 7 | #include <asm/ptrace.h> |
8 | 8 | ||
9 | int os_arch_prctl(int pid, int code, unsigned long *addr) | 9 | int os_arch_prctl(int pid, int code, unsigned long *addr) |
10 | { | 10 | { |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index c74436e687bf..72074d528400 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -85,15 +85,18 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
85 | cycle_t ret; | 85 | cycle_t ret; |
86 | u64 last; | 86 | u64 last; |
87 | u32 version; | 87 | u32 version; |
88 | u32 migrate_count; | ||
89 | u8 flags; | 88 | u8 flags; |
90 | unsigned cpu, cpu1; | 89 | unsigned cpu, cpu1; |
91 | 90 | ||
92 | 91 | ||
93 | /* | 92 | /* |
94 | * When looping to get a consistent (time-info, tsc) pair, we | 93 | * Note: hypervisor must guarantee that: |
95 | * also need to deal with the possibility we can switch vcpus, | 94 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. |
96 | * so make sure we always re-fetch time-info for the current vcpu. | 95 | * 2. that per-CPU pvclock time info is updated if the |
96 | * underlying CPU changes. | ||
97 | * 3. that version is increased whenever underlying CPU | ||
98 | * changes. | ||
99 | * | ||
97 | */ | 100 | */ |
98 | do { | 101 | do { |
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | 102 | cpu = __getcpu() & VGETCPU_CPU_MASK; |
@@ -104,8 +107,6 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
104 | 107 | ||
105 | pvti = get_pvti(cpu); | 108 | pvti = get_pvti(cpu); |
106 | 109 | ||
107 | migrate_count = pvti->migrate_count; | ||
108 | |||
109 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | 110 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
110 | 111 | ||
111 | /* | 112 | /* |
@@ -117,8 +118,7 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
117 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | 118 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; |
118 | } while (unlikely(cpu != cpu1 || | 119 | } while (unlikely(cpu != cpu1 || |
119 | (pvti->pvti.version & 1) || | 120 | (pvti->pvti.version & 1) || |
120 | pvti->pvti.version != version || | 121 | pvti->pvti.version != version)); |
121 | pvti->migrate_count != migrate_count)); | ||
122 | 122 | ||
123 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | 123 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) |
124 | *mode = VCLOCK_NONE; | 124 | *mode = VCLOCK_NONE; |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2fcaedc0b739..fa6ade76ef3f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void) | |||
427 | 427 | ||
428 | if (!xen_initial_domain()) | 428 | if (!xen_initial_domain()) |
429 | cpuid_leaf1_edx_mask &= | 429 | cpuid_leaf1_edx_mask &= |
430 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ | 430 | ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ |
431 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | ||
432 | 431 | ||
433 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); | 432 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); |
434 | 433 | ||
@@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
735 | addr = (unsigned long)xen_int3; | 734 | addr = (unsigned long)xen_int3; |
736 | else if (addr == (unsigned long)stack_segment) | 735 | else if (addr == (unsigned long)stack_segment) |
737 | addr = (unsigned long)xen_stack_segment; | 736 | addr = (unsigned long)xen_stack_segment; |
738 | else if (addr == (unsigned long)double_fault || | 737 | else if (addr == (unsigned long)double_fault) { |
739 | addr == (unsigned long)nmi) { | ||
740 | /* Don't need to handle these */ | 738 | /* Don't need to handle these */ |
741 | return 0; | 739 | return 0; |
742 | #ifdef CONFIG_X86_MCE | 740 | #ifdef CONFIG_X86_MCE |
@@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
747 | */ | 745 | */ |
748 | ; | 746 | ; |
749 | #endif | 747 | #endif |
750 | } else { | 748 | } else if (addr == (unsigned long)nmi) |
749 | /* | ||
750 | * Use the native version as well. | ||
751 | */ | ||
752 | ; | ||
753 | else { | ||
751 | /* Some other trap using IST? */ | 754 | /* Some other trap using IST? */ |
752 | if (WARN_ON(val->ist != 0)) | 755 | if (WARN_ON(val->ist != 0)) |
753 | return 0; | 756 | return 0; |
@@ -1689,7 +1692,6 @@ static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action, | |||
1689 | case CPU_UP_PREPARE: | 1692 | case CPU_UP_PREPARE: |
1690 | xen_vcpu_setup(cpu); | 1693 | xen_vcpu_setup(cpu); |
1691 | if (xen_have_vector_callback) { | 1694 | if (xen_have_vector_callback) { |
1692 | xen_init_lock_cpu(cpu); | ||
1693 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) | 1695 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) |
1694 | xen_setup_timer(cpu); | 1696 | xen_setup_timer(cpu); |
1695 | } | 1697 | } |
@@ -1710,6 +1712,8 @@ static void __init xen_hvm_guest_init(void) | |||
1710 | 1712 | ||
1711 | xen_hvm_init_shared_info(); | 1713 | xen_hvm_init_shared_info(); |
1712 | 1714 | ||
1715 | xen_panic_handler_init(); | ||
1716 | |||
1713 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1717 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1714 | xen_have_vector_callback = 1; | 1718 | xen_have_vector_callback = 1; |
1715 | xen_hvm_smp_init(); | 1719 | xen_hvm_smp_init(); |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 01a4dc015ae1..0da7f863056f 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags) | |||
47 | /* convert from IF type flag */ | 47 | /* convert from IF type flag */ |
48 | flags = !(flags & X86_EFLAGS_IF); | 48 | flags = !(flags & X86_EFLAGS_IF); |
49 | 49 | ||
50 | /* There's a one instruction preempt window here. We need to | 50 | /* See xen_irq_enable() for why preemption must be disabled. */ |
51 | make sure we're don't switch CPUs between getting the vcpu | ||
52 | pointer and updating the mask. */ | ||
53 | preempt_disable(); | 51 | preempt_disable(); |
54 | vcpu = this_cpu_read(xen_vcpu); | 52 | vcpu = this_cpu_read(xen_vcpu); |
55 | vcpu->evtchn_upcall_mask = flags; | 53 | vcpu->evtchn_upcall_mask = flags; |
56 | preempt_enable_no_resched(); | ||
57 | |||
58 | /* Doesn't matter if we get preempted here, because any | ||
59 | pending event will get dealt with anyway. */ | ||
60 | 54 | ||
61 | if (flags == 0) { | 55 | if (flags == 0) { |
62 | preempt_check_resched(); | ||
63 | barrier(); /* unmask then check (avoid races) */ | 56 | barrier(); /* unmask then check (avoid races) */ |
64 | if (unlikely(vcpu->evtchn_upcall_pending)) | 57 | if (unlikely(vcpu->evtchn_upcall_pending)) |
65 | xen_force_evtchn_callback(); | 58 | xen_force_evtchn_callback(); |
66 | } | 59 | preempt_enable(); |
60 | } else | ||
61 | preempt_enable_no_resched(); | ||
67 | } | 62 | } |
68 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); | 63 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); |
69 | 64 | ||
@@ -82,10 +77,12 @@ static void xen_irq_enable(void) | |||
82 | { | 77 | { |
83 | struct vcpu_info *vcpu; | 78 | struct vcpu_info *vcpu; |
84 | 79 | ||
85 | /* We don't need to worry about being preempted here, since | 80 | /* |
86 | either a) interrupts are disabled, so no preemption, or b) | 81 | * We may be preempted as soon as vcpu->evtchn_upcall_mask is |
87 | the caller is confused and is trying to re-enable interrupts | 82 | * cleared, so disable preemption to ensure we check for |
88 | on an indeterminate processor. */ | 83 | * events on the VCPU we are still running on. |
84 | */ | ||
85 | preempt_disable(); | ||
89 | 86 | ||
90 | vcpu = this_cpu_read(xen_vcpu); | 87 | vcpu = this_cpu_read(xen_vcpu); |
91 | vcpu->evtchn_upcall_mask = 0; | 88 | vcpu->evtchn_upcall_mask = 0; |
@@ -96,6 +93,8 @@ static void xen_irq_enable(void) | |||
96 | barrier(); /* unmask then check (avoid races) */ | 93 | barrier(); /* unmask then check (avoid races) */ |
97 | if (unlikely(vcpu->evtchn_upcall_pending)) | 94 | if (unlikely(vcpu->evtchn_upcall_pending)) |
98 | xen_force_evtchn_callback(); | 95 | xen_force_evtchn_callback(); |
96 | |||
97 | preempt_enable(); | ||
99 | } | 98 | } |
100 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); | 99 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); |
101 | 100 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa5927e..8b901e8d782d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -161,6 +161,7 @@ | |||
161 | #include <asm/xen/page.h> | 161 | #include <asm/xen/page.h> |
162 | #include <asm/xen/hypercall.h> | 162 | #include <asm/xen/hypercall.h> |
163 | #include <asm/xen/hypervisor.h> | 163 | #include <asm/xen/hypervisor.h> |
164 | #include <xen/balloon.h> | ||
164 | #include <xen/grant_table.h> | 165 | #include <xen/grant_table.h> |
165 | 166 | ||
166 | #include "multicalls.h" | 167 | #include "multicalls.h" |
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page, | |||
967 | if (kmap_op != NULL) { | 968 | if (kmap_op != NULL) { |
968 | if (!PageHighMem(page)) { | 969 | if (!PageHighMem(page)) { |
969 | struct multicall_space mcs; | 970 | struct multicall_space mcs; |
970 | struct gnttab_unmap_grant_ref *unmap_op; | 971 | struct gnttab_unmap_and_replace *unmap_op; |
972 | struct page *scratch_page = get_balloon_scratch_page(); | ||
973 | unsigned long scratch_page_address = (unsigned long) | ||
974 | __va(page_to_pfn(scratch_page) << PAGE_SHIFT); | ||
971 | 975 | ||
972 | /* | 976 | /* |
973 | * It might be that we queued all the m2p grant table | 977 | * It might be that we queued all the m2p grant table |
@@ -986,25 +990,31 @@ int m2p_remove_override(struct page *page, | |||
986 | printk(KERN_WARNING "m2p_remove_override: " | 990 | printk(KERN_WARNING "m2p_remove_override: " |
987 | "pfn %lx mfn %lx, failed to modify kernel mappings", | 991 | "pfn %lx mfn %lx, failed to modify kernel mappings", |
988 | pfn, mfn); | 992 | pfn, mfn); |
993 | put_balloon_scratch_page(); | ||
989 | return -1; | 994 | return -1; |
990 | } | 995 | } |
991 | 996 | ||
992 | mcs = xen_mc_entry( | 997 | xen_mc_batch(); |
993 | sizeof(struct gnttab_unmap_grant_ref)); | 998 | |
999 | mcs = __xen_mc_entry( | ||
1000 | sizeof(struct gnttab_unmap_and_replace)); | ||
994 | unmap_op = mcs.args; | 1001 | unmap_op = mcs.args; |
995 | unmap_op->host_addr = kmap_op->host_addr; | 1002 | unmap_op->host_addr = kmap_op->host_addr; |
1003 | unmap_op->new_addr = scratch_page_address; | ||
996 | unmap_op->handle = kmap_op->handle; | 1004 | unmap_op->handle = kmap_op->handle; |
997 | unmap_op->dev_bus_addr = 0; | ||
998 | 1005 | ||
999 | MULTI_grant_table_op(mcs.mc, | 1006 | MULTI_grant_table_op(mcs.mc, |
1000 | GNTTABOP_unmap_grant_ref, unmap_op, 1); | 1007 | GNTTABOP_unmap_and_replace, unmap_op, 1); |
1008 | |||
1009 | mcs = __xen_mc_entry(0); | ||
1010 | MULTI_update_va_mapping(mcs.mc, scratch_page_address, | ||
1011 | pfn_pte(page_to_pfn(scratch_page), | ||
1012 | PAGE_KERNEL_RO), 0); | ||
1001 | 1013 | ||
1002 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 1014 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
1003 | 1015 | ||
1004 | set_pte_at(&init_mm, address, ptep, | ||
1005 | pfn_pte(pfn, PAGE_KERNEL)); | ||
1006 | __flush_tlb_single(address); | ||
1007 | kmap_op->host_addr = 0; | 1016 | kmap_op->host_addr = 0; |
1017 | put_balloon_scratch_page(); | ||
1008 | } | 1018 | } |
1009 | } | 1019 | } |
1010 | 1020 | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8f3eea6b80c5..09f3059cb00b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -33,6 +33,9 @@ | |||
33 | /* These are code, but not functions. Defined in entry.S */ | 33 | /* These are code, but not functions. Defined in entry.S */ |
34 | extern const char xen_hypervisor_callback[]; | 34 | extern const char xen_hypervisor_callback[]; |
35 | extern const char xen_failsafe_callback[]; | 35 | extern const char xen_failsafe_callback[]; |
36 | #ifdef CONFIG_X86_64 | ||
37 | extern const char nmi[]; | ||
38 | #endif | ||
36 | extern void xen_sysenter_target(void); | 39 | extern void xen_sysenter_target(void); |
37 | extern void xen_syscall_target(void); | 40 | extern void xen_syscall_target(void); |
38 | extern void xen_syscall32_target(void); | 41 | extern void xen_syscall32_target(void); |
@@ -215,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk( | |||
215 | unsigned long pfn; | 218 | unsigned long pfn; |
216 | 219 | ||
217 | /* | 220 | /* |
218 | * If the PFNs are currently mapped, the VA mapping also needs | 221 | * If the PFNs are currently mapped, clear the mappings |
219 | * to be updated to be 1:1. | 222 | * (except for the ISA region which must be 1:1 mapped) to |
223 | * release the refcounts (in Xen) on the original frames. | ||
220 | */ | 224 | */ |
221 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) | 225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { |
226 | pte_t pte = __pte_ma(0); | ||
227 | |||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | ||
229 | pte = mfn_pte(pfn, PAGE_KERNEL_IO); | ||
230 | |||
222 | (void)HYPERVISOR_update_va_mapping( | 231 | (void)HYPERVISOR_update_va_mapping( |
223 | (unsigned long)__va(pfn << PAGE_SHIFT), | 232 | (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); |
224 | mfn_pte(pfn, PAGE_KERNEL_IO), 0); | 233 | } |
225 | 234 | ||
226 | if (start_pfn < nr_pages) | 235 | if (start_pfn < nr_pages) |
227 | *released += xen_release_chunk( | 236 | *released += xen_release_chunk( |
@@ -547,7 +556,13 @@ void xen_enable_syscall(void) | |||
547 | } | 556 | } |
548 | #endif /* CONFIG_X86_64 */ | 557 | #endif /* CONFIG_X86_64 */ |
549 | } | 558 | } |
550 | 559 | void __cpuinit xen_enable_nmi(void) | |
560 | { | ||
561 | #ifdef CONFIG_X86_64 | ||
562 | if (register_callback(CALLBACKTYPE_nmi, nmi)) | ||
563 | BUG(); | ||
564 | #endif | ||
565 | } | ||
551 | void __init xen_arch_setup(void) | 566 | void __init xen_arch_setup(void) |
552 | { | 567 | { |
553 | xen_panic_handler_init(); | 568 | xen_panic_handler_init(); |
@@ -565,7 +580,7 @@ void __init xen_arch_setup(void) | |||
565 | 580 | ||
566 | xen_enable_sysenter(); | 581 | xen_enable_sysenter(); |
567 | xen_enable_syscall(); | 582 | xen_enable_syscall(); |
568 | 583 | xen_enable_nmi(); | |
569 | #ifdef CONFIG_ACPI | 584 | #ifdef CONFIG_ACPI |
570 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 585 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
571 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 586 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 597655bd72b0..d1e4777b4e75 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -273,12 +273,20 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
273 | BUG_ON(smp_processor_id() != 0); | 273 | BUG_ON(smp_processor_id() != 0); |
274 | native_smp_prepare_boot_cpu(); | 274 | native_smp_prepare_boot_cpu(); |
275 | 275 | ||
276 | /* We've switched to the "real" per-cpu gdt, so make sure the | 276 | if (xen_pv_domain()) { |
277 | old memory can be recycled */ | 277 | /* We've switched to the "real" per-cpu gdt, so make sure the |
278 | make_lowmem_page_readwrite(xen_initial_gdt); | 278 | old memory can be recycled */ |
279 | make_lowmem_page_readwrite(xen_initial_gdt); | ||
279 | 280 | ||
280 | xen_filter_cpu_maps(); | 281 | xen_filter_cpu_maps(); |
281 | xen_setup_vcpu_info_placement(); | 282 | xen_setup_vcpu_info_placement(); |
283 | } | ||
284 | /* | ||
285 | * The alternative logic (which patches the unlock/lock) runs before | ||
286 | * the smp bootup up code is activated. Hence we need to set this up | ||
287 | * the core kernel is being patched. Otherwise we will have only | ||
288 | * modules patched but not core code. | ||
289 | */ | ||
282 | xen_init_spinlocks(); | 290 | xen_init_spinlocks(); |
283 | } | 291 | } |
284 | 292 | ||
@@ -573,6 +581,12 @@ static inline int xen_map_vector(int vector) | |||
573 | case IRQ_WORK_VECTOR: | 581 | case IRQ_WORK_VECTOR: |
574 | xen_vector = XEN_IRQ_WORK_VECTOR; | 582 | xen_vector = XEN_IRQ_WORK_VECTOR; |
575 | break; | 583 | break; |
584 | #ifdef CONFIG_X86_64 | ||
585 | case NMI_VECTOR: | ||
586 | case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */ | ||
587 | xen_vector = XEN_NMI_VECTOR; | ||
588 | break; | ||
589 | #endif | ||
576 | default: | 590 | default: |
577 | xen_vector = -1; | 591 | xen_vector = -1; |
578 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", | 592 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", |
@@ -703,6 +717,15 @@ static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
703 | WARN_ON(rc); | 717 | WARN_ON(rc); |
704 | if (!rc) | 718 | if (!rc) |
705 | rc = native_cpu_up(cpu, tidle); | 719 | rc = native_cpu_up(cpu, tidle); |
720 | |||
721 | /* | ||
722 | * We must initialize the slowpath CPU kicker _after_ the native | ||
723 | * path has executed. If we initialized it before none of the | ||
724 | * unlocker IPI kicks would reach the booting CPU as the booting | ||
725 | * CPU had not set itself 'online' in cpu_online_mask. That mask | ||
726 | * is checked when IPIs are sent (on HVM at least). | ||
727 | */ | ||
728 | xen_init_lock_cpu(cpu); | ||
706 | return rc; | 729 | return rc; |
707 | } | 730 | } |
708 | 731 | ||
@@ -722,4 +745,5 @@ void __init xen_hvm_smp_init(void) | |||
722 | smp_ops.cpu_die = xen_hvm_cpu_die; | 745 | smp_ops.cpu_die = xen_hvm_cpu_die; |
723 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; | 746 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; |
724 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; | 747 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; |
748 | smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; | ||
725 | } | 749 | } |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 0438b9324a72..253f63fceea1 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -81,7 +81,6 @@ static inline void spin_time_accum_blocked(u64 start) | |||
81 | spinlock_stats.time_blocked += delta; | 81 | spinlock_stats.time_blocked += delta; |
82 | } | 82 | } |
83 | #else /* !CONFIG_XEN_DEBUG_FS */ | 83 | #else /* !CONFIG_XEN_DEBUG_FS */ |
84 | #define TIMEOUT (1 << 10) | ||
85 | static inline void add_stats(enum xen_contention_stat var, u32 val) | 84 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
86 | { | 85 | { |
87 | } | 86 | } |
@@ -96,23 +95,6 @@ static inline void spin_time_accum_blocked(u64 start) | |||
96 | } | 95 | } |
97 | #endif /* CONFIG_XEN_DEBUG_FS */ | 96 | #endif /* CONFIG_XEN_DEBUG_FS */ |
98 | 97 | ||
99 | /* | ||
100 | * Size struct xen_spinlock so it's the same as arch_spinlock_t. | ||
101 | */ | ||
102 | #if NR_CPUS < 256 | ||
103 | typedef u8 xen_spinners_t; | ||
104 | # define inc_spinners(xl) \ | ||
105 | asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory"); | ||
106 | # define dec_spinners(xl) \ | ||
107 | asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory"); | ||
108 | #else | ||
109 | typedef u16 xen_spinners_t; | ||
110 | # define inc_spinners(xl) \ | ||
111 | asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory"); | ||
112 | # define dec_spinners(xl) \ | ||
113 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | ||
114 | #endif | ||
115 | |||
116 | struct xen_lock_waiting { | 98 | struct xen_lock_waiting { |
117 | struct arch_spinlock *lock; | 99 | struct arch_spinlock *lock; |
118 | __ticket_t want; | 100 | __ticket_t want; |
@@ -123,6 +105,7 @@ static DEFINE_PER_CPU(char *, irq_name); | |||
123 | static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); | 105 | static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); |
124 | static cpumask_t waiting_cpus; | 106 | static cpumask_t waiting_cpus; |
125 | 107 | ||
108 | static bool xen_pvspin = true; | ||
126 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | 109 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
127 | { | 110 | { |
128 | int irq = __this_cpu_read(lock_kicker_irq); | 111 | int irq = __this_cpu_read(lock_kicker_irq); |
@@ -241,16 +224,12 @@ void xen_init_lock_cpu(int cpu) | |||
241 | int irq; | 224 | int irq; |
242 | char *name; | 225 | char *name; |
243 | 226 | ||
227 | if (!xen_pvspin) | ||
228 | return; | ||
229 | |||
244 | WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", | 230 | WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", |
245 | cpu, per_cpu(lock_kicker_irq, cpu)); | 231 | cpu, per_cpu(lock_kicker_irq, cpu)); |
246 | 232 | ||
247 | /* | ||
248 | * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 | ||
249 | * (xen: disable PV spinlocks on HVM) | ||
250 | */ | ||
251 | if (xen_hvm_domain()) | ||
252 | return; | ||
253 | |||
254 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | 233 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); |
255 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | 234 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, |
256 | cpu, | 235 | cpu, |
@@ -270,11 +249,7 @@ void xen_init_lock_cpu(int cpu) | |||
270 | 249 | ||
271 | void xen_uninit_lock_cpu(int cpu) | 250 | void xen_uninit_lock_cpu(int cpu) |
272 | { | 251 | { |
273 | /* | 252 | if (!xen_pvspin) |
274 | * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 | ||
275 | * (xen: disable PV spinlocks on HVM) | ||
276 | */ | ||
277 | if (xen_hvm_domain()) | ||
278 | return; | 253 | return; |
279 | 254 | ||
280 | unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); | 255 | unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); |
@@ -283,16 +258,9 @@ void xen_uninit_lock_cpu(int cpu) | |||
283 | per_cpu(irq_name, cpu) = NULL; | 258 | per_cpu(irq_name, cpu) = NULL; |
284 | } | 259 | } |
285 | 260 | ||
286 | static bool xen_pvspin __initdata = true; | ||
287 | 261 | ||
288 | void __init xen_init_spinlocks(void) | 262 | void __init xen_init_spinlocks(void) |
289 | { | 263 | { |
290 | /* | ||
291 | * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 | ||
292 | * (xen: disable PV spinlocks on HVM) | ||
293 | */ | ||
294 | if (xen_hvm_domain()) | ||
295 | return; | ||
296 | 264 | ||
297 | if (!xen_pvspin) { | 265 | if (!xen_pvspin) { |
298 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); | 266 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); |
@@ -323,6 +291,9 @@ static int __init xen_spinlock_debugfs(void) | |||
323 | if (d_xen == NULL) | 291 | if (d_xen == NULL) |
324 | return -ENOMEM; | 292 | return -ENOMEM; |
325 | 293 | ||
294 | if (!xen_pvspin) | ||
295 | return 0; | ||
296 | |||
326 | d_spin_debug = debugfs_create_dir("spinlocks", d_xen); | 297 | d_spin_debug = debugfs_create_dir("spinlocks", d_xen); |
327 | 298 | ||
328 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | 299 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); |