diff options
-rw-r--r-- | tools/testing/selftests/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 38 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 139 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/sparsebit.h | 75 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/test_util.h | 45 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86.h | 1043 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/assert.c | 87 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 1480 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util_internal.h | 67 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/sparsebit.c | 2087 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86.c | 697 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/set_sregs_test.c | 54 |
12 files changed, 5813 insertions, 0 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 7442dfb73b7f..c98f1b874582 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile | |||
@@ -14,6 +14,7 @@ TARGETS += gpio | |||
14 | TARGETS += intel_pstate | 14 | TARGETS += intel_pstate |
15 | TARGETS += ipc | 15 | TARGETS += ipc |
16 | TARGETS += kcmp | 16 | TARGETS += kcmp |
17 | TARGETS += kvm | ||
17 | TARGETS += lib | 18 | TARGETS += lib |
18 | TARGETS += membarrier | 19 | TARGETS += membarrier |
19 | TARGETS += memfd | 20 | TARGETS += memfd |
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile new file mode 100644 index 000000000000..6aade26e9ca2 --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile | |||
@@ -0,0 +1,38 @@ | |||
1 | all: | ||
2 | |||
3 | top_srcdir = ../../../../ | ||
4 | UNAME_M := $(shell uname -m) | ||
5 | |||
6 | LIBKVM = lib/assert.c lib/kvm_util.c lib/sparsebit.c | ||
7 | LIBKVM_x86_64 = lib/x86.c | ||
8 | |||
9 | TEST_GEN_PROGS_x86_64 = set_sregs_test | ||
10 | |||
11 | TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) | ||
12 | LIBKVM += $(LIBKVM_$(UNAME_M)) | ||
13 | |||
14 | INSTALL_HDR_PATH = $(top_srcdir)/usr | ||
15 | LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ | ||
16 | CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) | ||
17 | |||
18 | # After inclusion, $(OUTPUT) is defined and | ||
19 | # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ | ||
20 | include ../lib.mk | ||
21 | |||
22 | STATIC_LIBS := $(OUTPUT)/libkvm.a | ||
23 | LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) | ||
24 | EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) | ||
25 | |||
26 | x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) | ||
27 | $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c | ||
28 | $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ | ||
29 | |||
30 | $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) | ||
31 | $(AR) crs $@ $^ | ||
32 | |||
33 | $(LINUX_HDR_PATH): | ||
34 | make -C $(top_srcdir) headers_install | ||
35 | |||
36 | all: $(STATIC_LIBS) $(LINUX_HDR_PATH) | ||
37 | $(TEST_GEN_PROGS): $(STATIC_LIBS) | ||
38 | $(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH) | ||
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h new file mode 100644 index 000000000000..d2e3e23bfbd3 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util.h | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/include/kvm_util.h | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | * | ||
8 | */ | ||
9 | #ifndef SELFTEST_KVM_UTIL_H | ||
10 | #define SELFTEST_KVM_UTIL_H 1 | ||
11 | |||
12 | #include "test_util.h" | ||
13 | |||
14 | #include "asm/kvm.h" | ||
15 | #include "linux/kvm.h" | ||
16 | #include <sys/ioctl.h> | ||
17 | |||
18 | #include "sparsebit.h" | ||
19 | |||
20 | /* | ||
21 | * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be | ||
22 | * created. Only applies to VMs using EPT. | ||
23 | */ | ||
24 | #define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul | ||
25 | |||
26 | |||
27 | /* Callers of kvm_util only have an incomplete/opaque description of the | ||
28 | * structure kvm_util is using to maintain the state of a VM. | ||
29 | */ | ||
30 | struct kvm_vm; | ||
31 | |||
32 | typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ | ||
33 | typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ | ||
34 | |||
35 | /* Minimum allocated guest virtual and physical addresses */ | ||
36 | #define KVM_UTIL_MIN_VADDR 0x2000 | ||
37 | |||
38 | #define DEFAULT_GUEST_PHY_PAGES 512 | ||
39 | #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 | ||
40 | #define DEFAULT_STACK_PGS 5 | ||
41 | |||
42 | enum vm_guest_mode { | ||
43 | VM_MODE_FLAT48PG, | ||
44 | }; | ||
45 | |||
46 | enum vm_mem_backing_src_type { | ||
47 | VM_MEM_SRC_ANONYMOUS, | ||
48 | VM_MEM_SRC_ANONYMOUS_THP, | ||
49 | VM_MEM_SRC_ANONYMOUS_HUGETLB, | ||
50 | }; | ||
51 | |||
52 | int kvm_check_cap(long cap); | ||
53 | |||
54 | struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); | ||
55 | void kvm_vm_free(struct kvm_vm *vmp); | ||
56 | |||
57 | int kvm_memcmp_hva_gva(void *hva, | ||
58 | struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); | ||
59 | |||
60 | void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); | ||
61 | void vcpu_dump(FILE *stream, struct kvm_vm *vm, | ||
62 | uint32_t vcpuid, uint8_t indent); | ||
63 | |||
64 | void vm_create_irqchip(struct kvm_vm *vm); | ||
65 | |||
66 | void vm_userspace_mem_region_add(struct kvm_vm *vm, | ||
67 | enum vm_mem_backing_src_type src_type, | ||
68 | uint64_t guest_paddr, uint32_t slot, uint64_t npages, | ||
69 | uint32_t flags); | ||
70 | |||
71 | void vcpu_ioctl(struct kvm_vm *vm, | ||
72 | uint32_t vcpuid, unsigned long ioctl, void *arg); | ||
73 | void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); | ||
74 | void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); | ||
75 | void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); | ||
76 | vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, | ||
77 | uint32_t data_memslot, uint32_t pgd_memslot); | ||
78 | void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); | ||
79 | void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); | ||
80 | vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); | ||
81 | vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); | ||
82 | |||
83 | struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); | ||
84 | void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); | ||
85 | int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); | ||
86 | void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, | ||
87 | struct kvm_mp_state *mp_state); | ||
88 | void vcpu_regs_get(struct kvm_vm *vm, | ||
89 | uint32_t vcpuid, struct kvm_regs *regs); | ||
90 | void vcpu_regs_set(struct kvm_vm *vm, | ||
91 | uint32_t vcpuid, struct kvm_regs *regs); | ||
92 | void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); | ||
93 | void vcpu_sregs_get(struct kvm_vm *vm, | ||
94 | uint32_t vcpuid, struct kvm_sregs *sregs); | ||
95 | void vcpu_sregs_set(struct kvm_vm *vm, | ||
96 | uint32_t vcpuid, struct kvm_sregs *sregs); | ||
97 | int _vcpu_sregs_set(struct kvm_vm *vm, | ||
98 | uint32_t vcpuid, struct kvm_sregs *sregs); | ||
99 | void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, | ||
100 | struct kvm_vcpu_events *events); | ||
101 | void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, | ||
102 | struct kvm_vcpu_events *events); | ||
103 | |||
104 | const char *exit_reason_str(unsigned int exit_reason); | ||
105 | |||
106 | void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); | ||
107 | void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, | ||
108 | uint32_t pgd_memslot); | ||
109 | vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, | ||
110 | vm_paddr_t paddr_min, uint32_t memslot); | ||
111 | |||
112 | void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); | ||
113 | void vcpu_set_cpuid( | ||
114 | struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); | ||
115 | |||
116 | struct kvm_cpuid2 *allocate_kvm_cpuid2(void); | ||
117 | struct kvm_cpuid_entry2 * | ||
118 | find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, | ||
119 | uint32_t index); | ||
120 | |||
121 | static inline struct kvm_cpuid_entry2 * | ||
122 | find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) | ||
123 | { | ||
124 | return find_cpuid_index_entry(cpuid, function, 0); | ||
125 | } | ||
126 | |||
127 | struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); | ||
128 | void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); | ||
129 | |||
130 | struct kvm_userspace_memory_region * | ||
131 | kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, | ||
132 | uint64_t end); | ||
133 | |||
134 | struct kvm_dirty_log * | ||
135 | allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); | ||
136 | |||
137 | int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); | ||
138 | |||
139 | #endif /* SELFTEST_KVM_UTIL_H */ | ||
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h new file mode 100644 index 000000000000..54cfeb6568d3 --- /dev/null +++ b/tools/testing/selftests/kvm/include/sparsebit.h | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/include/sparsebit.h | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | * | ||
8 | * | ||
9 | * Header file that describes API to the sparsebit library. | ||
10 | * This library provides a memory efficient means of storing | ||
11 | * the settings of bits indexed via a uint64_t. Memory usage | ||
12 | * is reasonable, significantly less than (2^64 / 8) bytes, as | ||
13 | * long as bits that are mostly set or mostly cleared are close | ||
14 | * to each other. This library is efficient in memory usage | ||
15 | * even in the case where most bits are set. | ||
16 | */ | ||
17 | |||
18 | #ifndef _TEST_SPARSEBIT_H_ | ||
19 | #define _TEST_SPARSEBIT_H_ | ||
20 | |||
21 | #include <stdbool.h> | ||
22 | #include <stdint.h> | ||
23 | #include <stdio.h> | ||
24 | |||
25 | #ifdef __cplusplus | ||
26 | extern "C" { | ||
27 | #endif | ||
28 | |||
29 | struct sparsebit; | ||
30 | typedef uint64_t sparsebit_idx_t; | ||
31 | typedef uint64_t sparsebit_num_t; | ||
32 | |||
33 | struct sparsebit *sparsebit_alloc(void); | ||
34 | void sparsebit_free(struct sparsebit **sbitp); | ||
35 | void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src); | ||
36 | |||
37 | bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx); | ||
38 | bool sparsebit_is_set_num(struct sparsebit *sbit, | ||
39 | sparsebit_idx_t idx, sparsebit_num_t num); | ||
40 | bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx); | ||
41 | bool sparsebit_is_clear_num(struct sparsebit *sbit, | ||
42 | sparsebit_idx_t idx, sparsebit_num_t num); | ||
43 | sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit); | ||
44 | bool sparsebit_any_set(struct sparsebit *sbit); | ||
45 | bool sparsebit_any_clear(struct sparsebit *sbit); | ||
46 | bool sparsebit_all_set(struct sparsebit *sbit); | ||
47 | bool sparsebit_all_clear(struct sparsebit *sbit); | ||
48 | sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit); | ||
49 | sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit); | ||
50 | sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev); | ||
51 | sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev); | ||
52 | sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit, | ||
53 | sparsebit_idx_t start, sparsebit_num_t num); | ||
54 | sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit, | ||
55 | sparsebit_idx_t start, sparsebit_num_t num); | ||
56 | |||
57 | void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx); | ||
58 | void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start, | ||
59 | sparsebit_num_t num); | ||
60 | void sparsebit_set_all(struct sparsebit *sbitp); | ||
61 | |||
62 | void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx); | ||
63 | void sparsebit_clear_num(struct sparsebit *sbitp, | ||
64 | sparsebit_idx_t start, sparsebit_num_t num); | ||
65 | void sparsebit_clear_all(struct sparsebit *sbitp); | ||
66 | |||
67 | void sparsebit_dump(FILE *stream, struct sparsebit *sbit, | ||
68 | unsigned int indent); | ||
69 | void sparsebit_validate_internal(struct sparsebit *sbit); | ||
70 | |||
71 | #ifdef __cplusplus | ||
72 | } | ||
73 | #endif | ||
74 | |||
75 | #endif /* _TEST_SPARSEBIT_H_ */ | ||
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h new file mode 100644 index 000000000000..7ab98e41324f --- /dev/null +++ b/tools/testing/selftests/kvm/include/test_util.h | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/include/test_util.h | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef TEST_UTIL_H | ||
11 | #define TEST_UTIL_H 1 | ||
12 | |||
13 | #include <stdlib.h> | ||
14 | #include <stdarg.h> | ||
15 | #include <stdbool.h> | ||
16 | #include <stdio.h> | ||
17 | #include <string.h> | ||
18 | #include <inttypes.h> | ||
19 | #include <errno.h> | ||
20 | #include <unistd.h> | ||
21 | #include <fcntl.h> | ||
22 | |||
23 | ssize_t test_write(int fd, const void *buf, size_t count); | ||
24 | ssize_t test_read(int fd, void *buf, size_t count); | ||
25 | int test_seq_read(const char *path, char **bufp, size_t *sizep); | ||
26 | |||
27 | void test_assert(bool exp, const char *exp_str, | ||
28 | const char *file, unsigned int line, const char *fmt, ...); | ||
29 | |||
30 | #define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) | ||
31 | |||
32 | #define TEST_ASSERT(e, fmt, ...) \ | ||
33 | test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | ||
34 | |||
35 | #define ASSERT_EQ(a, b) do { \ | ||
36 | typeof(a) __a = (a); \ | ||
37 | typeof(b) __b = (b); \ | ||
38 | TEST_ASSERT(__a == __b, \ | ||
39 | "ASSERT_EQ(%s, %s) failed.\n" \ | ||
40 | "\t%s is %#lx\n" \ | ||
41 | "\t%s is %#lx", \ | ||
42 | #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ | ||
43 | } while (0) | ||
44 | |||
45 | #endif /* TEST_UTIL_H */ | ||
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h new file mode 100644 index 000000000000..4a5b2c4c1a0f --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86.h | |||
@@ -0,0 +1,1043 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/include/x86.h | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef SELFTEST_KVM_X86_H | ||
11 | #define SELFTEST_KVM_X86_H | ||
12 | |||
13 | #include <assert.h> | ||
14 | #include <stdint.h> | ||
15 | |||
16 | #define X86_EFLAGS_FIXED (1u << 1) | ||
17 | |||
18 | #define X86_CR4_VME (1ul << 0) | ||
19 | #define X86_CR4_PVI (1ul << 1) | ||
20 | #define X86_CR4_TSD (1ul << 2) | ||
21 | #define X86_CR4_DE (1ul << 3) | ||
22 | #define X86_CR4_PSE (1ul << 4) | ||
23 | #define X86_CR4_PAE (1ul << 5) | ||
24 | #define X86_CR4_MCE (1ul << 6) | ||
25 | #define X86_CR4_PGE (1ul << 7) | ||
26 | #define X86_CR4_PCE (1ul << 8) | ||
27 | #define X86_CR4_OSFXSR (1ul << 9) | ||
28 | #define X86_CR4_OSXMMEXCPT (1ul << 10) | ||
29 | #define X86_CR4_UMIP (1ul << 11) | ||
30 | #define X86_CR4_VMXE (1ul << 13) | ||
31 | #define X86_CR4_SMXE (1ul << 14) | ||
32 | #define X86_CR4_FSGSBASE (1ul << 16) | ||
33 | #define X86_CR4_PCIDE (1ul << 17) | ||
34 | #define X86_CR4_OSXSAVE (1ul << 18) | ||
35 | #define X86_CR4_SMEP (1ul << 20) | ||
36 | #define X86_CR4_SMAP (1ul << 21) | ||
37 | #define X86_CR4_PKE (1ul << 22) | ||
38 | |||
39 | /* The enum values match the intruction encoding of each register */ | ||
40 | enum x86_register { | ||
41 | RAX = 0, | ||
42 | RCX, | ||
43 | RDX, | ||
44 | RBX, | ||
45 | RSP, | ||
46 | RBP, | ||
47 | RSI, | ||
48 | RDI, | ||
49 | R8, | ||
50 | R9, | ||
51 | R10, | ||
52 | R11, | ||
53 | R12, | ||
54 | R13, | ||
55 | R14, | ||
56 | R15, | ||
57 | }; | ||
58 | |||
59 | struct desc64 { | ||
60 | uint16_t limit0; | ||
61 | uint16_t base0; | ||
62 | unsigned base1:8, type:5, dpl:2, p:1; | ||
63 | unsigned limit1:4, zero0:3, g:1, base2:8; | ||
64 | uint32_t base3; | ||
65 | uint32_t zero1; | ||
66 | } __attribute__((packed)); | ||
67 | |||
68 | struct desc_ptr { | ||
69 | uint16_t size; | ||
70 | uint64_t address; | ||
71 | } __attribute__((packed)); | ||
72 | |||
73 | static inline uint64_t get_desc64_base(const struct desc64 *desc) | ||
74 | { | ||
75 | return ((uint64_t)desc->base3 << 32) | | ||
76 | (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); | ||
77 | } | ||
78 | |||
79 | static inline uint64_t rdtsc(void) | ||
80 | { | ||
81 | uint32_t eax, edx; | ||
82 | |||
83 | /* | ||
84 | * The lfence is to wait (on Intel CPUs) until all previous | ||
85 | * instructions have been executed. | ||
86 | */ | ||
87 | __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); | ||
88 | return ((uint64_t)edx) << 32 | eax; | ||
89 | } | ||
90 | |||
91 | static inline uint64_t rdtscp(uint32_t *aux) | ||
92 | { | ||
93 | uint32_t eax, edx; | ||
94 | |||
95 | __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); | ||
96 | return ((uint64_t)edx) << 32 | eax; | ||
97 | } | ||
98 | |||
99 | static inline uint64_t rdmsr(uint32_t msr) | ||
100 | { | ||
101 | uint32_t a, d; | ||
102 | |||
103 | __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); | ||
104 | |||
105 | return a | ((uint64_t) d << 32); | ||
106 | } | ||
107 | |||
108 | static inline void wrmsr(uint32_t msr, uint64_t value) | ||
109 | { | ||
110 | uint32_t a = value; | ||
111 | uint32_t d = value >> 32; | ||
112 | |||
113 | __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); | ||
114 | } | ||
115 | |||
116 | |||
117 | static inline uint16_t inw(uint16_t port) | ||
118 | { | ||
119 | uint16_t tmp; | ||
120 | |||
121 | __asm__ __volatile__("in %%dx, %%ax" | ||
122 | : /* output */ "=a" (tmp) | ||
123 | : /* input */ "d" (port)); | ||
124 | |||
125 | return tmp; | ||
126 | } | ||
127 | |||
128 | static inline uint16_t get_es(void) | ||
129 | { | ||
130 | uint16_t es; | ||
131 | |||
132 | __asm__ __volatile__("mov %%es, %[es]" | ||
133 | : /* output */ [es]"=rm"(es)); | ||
134 | return es; | ||
135 | } | ||
136 | |||
137 | static inline uint16_t get_cs(void) | ||
138 | { | ||
139 | uint16_t cs; | ||
140 | |||
141 | __asm__ __volatile__("mov %%cs, %[cs]" | ||
142 | : /* output */ [cs]"=rm"(cs)); | ||
143 | return cs; | ||
144 | } | ||
145 | |||
146 | static inline uint16_t get_ss(void) | ||
147 | { | ||
148 | uint16_t ss; | ||
149 | |||
150 | __asm__ __volatile__("mov %%ss, %[ss]" | ||
151 | : /* output */ [ss]"=rm"(ss)); | ||
152 | return ss; | ||
153 | } | ||
154 | |||
155 | static inline uint16_t get_ds(void) | ||
156 | { | ||
157 | uint16_t ds; | ||
158 | |||
159 | __asm__ __volatile__("mov %%ds, %[ds]" | ||
160 | : /* output */ [ds]"=rm"(ds)); | ||
161 | return ds; | ||
162 | } | ||
163 | |||
164 | static inline uint16_t get_fs(void) | ||
165 | { | ||
166 | uint16_t fs; | ||
167 | |||
168 | __asm__ __volatile__("mov %%fs, %[fs]" | ||
169 | : /* output */ [fs]"=rm"(fs)); | ||
170 | return fs; | ||
171 | } | ||
172 | |||
173 | static inline uint16_t get_gs(void) | ||
174 | { | ||
175 | uint16_t gs; | ||
176 | |||
177 | __asm__ __volatile__("mov %%gs, %[gs]" | ||
178 | : /* output */ [gs]"=rm"(gs)); | ||
179 | return gs; | ||
180 | } | ||
181 | |||
182 | static inline uint16_t get_tr(void) | ||
183 | { | ||
184 | uint16_t tr; | ||
185 | |||
186 | __asm__ __volatile__("str %[tr]" | ||
187 | : /* output */ [tr]"=rm"(tr)); | ||
188 | return tr; | ||
189 | } | ||
190 | |||
191 | static inline uint64_t get_cr0(void) | ||
192 | { | ||
193 | uint64_t cr0; | ||
194 | |||
195 | __asm__ __volatile__("mov %%cr0, %[cr0]" | ||
196 | : /* output */ [cr0]"=r"(cr0)); | ||
197 | return cr0; | ||
198 | } | ||
199 | |||
200 | static inline uint64_t get_cr3(void) | ||
201 | { | ||
202 | uint64_t cr3; | ||
203 | |||
204 | __asm__ __volatile__("mov %%cr3, %[cr3]" | ||
205 | : /* output */ [cr3]"=r"(cr3)); | ||
206 | return cr3; | ||
207 | } | ||
208 | |||
209 | static inline uint64_t get_cr4(void) | ||
210 | { | ||
211 | uint64_t cr4; | ||
212 | |||
213 | __asm__ __volatile__("mov %%cr4, %[cr4]" | ||
214 | : /* output */ [cr4]"=r"(cr4)); | ||
215 | return cr4; | ||
216 | } | ||
217 | |||
218 | static inline void set_cr4(uint64_t val) | ||
219 | { | ||
220 | __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); | ||
221 | } | ||
222 | |||
223 | static inline uint64_t get_gdt_base(void) | ||
224 | { | ||
225 | struct desc_ptr gdt; | ||
226 | __asm__ __volatile__("sgdt %[gdt]" | ||
227 | : /* output */ [gdt]"=m"(gdt)); | ||
228 | return gdt.address; | ||
229 | } | ||
230 | |||
231 | static inline uint64_t get_idt_base(void) | ||
232 | { | ||
233 | struct desc_ptr idt; | ||
234 | __asm__ __volatile__("sidt %[idt]" | ||
235 | : /* output */ [idt]"=m"(idt)); | ||
236 | return idt.address; | ||
237 | } | ||
238 | |||
239 | #define SET_XMM(__var, __xmm) \ | ||
240 | asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) | ||
241 | |||
242 | static inline void set_xmm(int n, unsigned long val) | ||
243 | { | ||
244 | switch (n) { | ||
245 | case 0: | ||
246 | SET_XMM(val, xmm0); | ||
247 | break; | ||
248 | case 1: | ||
249 | SET_XMM(val, xmm1); | ||
250 | break; | ||
251 | case 2: | ||
252 | SET_XMM(val, xmm2); | ||
253 | break; | ||
254 | case 3: | ||
255 | SET_XMM(val, xmm3); | ||
256 | break; | ||
257 | case 4: | ||
258 | SET_XMM(val, xmm4); | ||
259 | break; | ||
260 | case 5: | ||
261 | SET_XMM(val, xmm5); | ||
262 | break; | ||
263 | case 6: | ||
264 | SET_XMM(val, xmm6); | ||
265 | break; | ||
266 | case 7: | ||
267 | SET_XMM(val, xmm7); | ||
268 | break; | ||
269 | } | ||
270 | } | ||
271 | |||
272 | typedef unsigned long v1di __attribute__ ((vector_size (8))); | ||
273 | static inline unsigned long get_xmm(int n) | ||
274 | { | ||
275 | assert(n >= 0 && n <= 7); | ||
276 | |||
277 | register v1di xmm0 __asm__("%xmm0"); | ||
278 | register v1di xmm1 __asm__("%xmm1"); | ||
279 | register v1di xmm2 __asm__("%xmm2"); | ||
280 | register v1di xmm3 __asm__("%xmm3"); | ||
281 | register v1di xmm4 __asm__("%xmm4"); | ||
282 | register v1di xmm5 __asm__("%xmm5"); | ||
283 | register v1di xmm6 __asm__("%xmm6"); | ||
284 | register v1di xmm7 __asm__("%xmm7"); | ||
285 | switch (n) { | ||
286 | case 0: | ||
287 | return (unsigned long)xmm0; | ||
288 | case 1: | ||
289 | return (unsigned long)xmm1; | ||
290 | case 2: | ||
291 | return (unsigned long)xmm2; | ||
292 | case 3: | ||
293 | return (unsigned long)xmm3; | ||
294 | case 4: | ||
295 | return (unsigned long)xmm4; | ||
296 | case 5: | ||
297 | return (unsigned long)xmm5; | ||
298 | case 6: | ||
299 | return (unsigned long)xmm6; | ||
300 | case 7: | ||
301 | return (unsigned long)xmm7; | ||
302 | } | ||
303 | return 0; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Basic CPU control in CR0 | ||
308 | */ | ||
309 | #define X86_CR0_PE (1UL<<0) /* Protection Enable */ | ||
310 | #define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ | ||
311 | #define X86_CR0_EM (1UL<<2) /* Emulation */ | ||
312 | #define X86_CR0_TS (1UL<<3) /* Task Switched */ | ||
313 | #define X86_CR0_ET (1UL<<4) /* Extension Type */ | ||
314 | #define X86_CR0_NE (1UL<<5) /* Numeric Error */ | ||
315 | #define X86_CR0_WP (1UL<<16) /* Write Protect */ | ||
316 | #define X86_CR0_AM (1UL<<18) /* Alignment Mask */ | ||
317 | #define X86_CR0_NW (1UL<<29) /* Not Write-through */ | ||
318 | #define X86_CR0_CD (1UL<<30) /* Cache Disable */ | ||
319 | #define X86_CR0_PG (1UL<<31) /* Paging */ | ||
320 | |||
321 | /* | ||
322 | * CPU model specific register (MSR) numbers. | ||
323 | */ | ||
324 | |||
325 | /* x86-64 specific MSRs */ | ||
326 | #define MSR_EFER 0xc0000080 /* extended feature register */ | ||
327 | #define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ | ||
328 | #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ | ||
329 | #define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ | ||
330 | #define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ | ||
331 | #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ | ||
332 | #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ | ||
333 | #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ | ||
334 | #define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ | ||
335 | |||
336 | /* EFER bits: */ | ||
337 | #define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ | ||
338 | #define EFER_LME (1<<8) /* Long mode enable */ | ||
339 | #define EFER_LMA (1<<10) /* Long mode active (read-only) */ | ||
340 | #define EFER_NX (1<<11) /* No execute enable */ | ||
341 | #define EFER_SVME (1<<12) /* Enable virtualization */ | ||
342 | #define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ | ||
343 | #define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ | ||
344 | |||
345 | /* Intel MSRs. Some also available on other CPUs */ | ||
346 | |||
347 | #define MSR_PPIN_CTL 0x0000004e | ||
348 | #define MSR_PPIN 0x0000004f | ||
349 | |||
350 | #define MSR_IA32_PERFCTR0 0x000000c1 | ||
351 | #define MSR_IA32_PERFCTR1 0x000000c2 | ||
352 | #define MSR_FSB_FREQ 0x000000cd | ||
353 | #define MSR_PLATFORM_INFO 0x000000ce | ||
354 | #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 | ||
355 | #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) | ||
356 | |||
357 | #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 | ||
358 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) | ||
359 | #define NHM_C1_AUTO_DEMOTE (1UL << 26) | ||
360 | #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) | ||
361 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
362 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
363 | |||
364 | #define MSR_MTRRcap 0x000000fe | ||
365 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | ||
366 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
367 | |||
368 | #define MSR_IA32_SYSENTER_CS 0x00000174 | ||
369 | #define MSR_IA32_SYSENTER_ESP 0x00000175 | ||
370 | #define MSR_IA32_SYSENTER_EIP 0x00000176 | ||
371 | |||
372 | #define MSR_IA32_MCG_CAP 0x00000179 | ||
373 | #define MSR_IA32_MCG_STATUS 0x0000017a | ||
374 | #define MSR_IA32_MCG_CTL 0x0000017b | ||
375 | #define MSR_IA32_MCG_EXT_CTL 0x000004d0 | ||
376 | |||
377 | #define MSR_OFFCORE_RSP_0 0x000001a6 | ||
378 | #define MSR_OFFCORE_RSP_1 0x000001a7 | ||
379 | #define MSR_TURBO_RATIO_LIMIT 0x000001ad | ||
380 | #define MSR_TURBO_RATIO_LIMIT1 0x000001ae | ||
381 | #define MSR_TURBO_RATIO_LIMIT2 0x000001af | ||
382 | |||
383 | #define MSR_LBR_SELECT 0x000001c8 | ||
384 | #define MSR_LBR_TOS 0x000001c9 | ||
385 | #define MSR_LBR_NHM_FROM 0x00000680 | ||
386 | #define MSR_LBR_NHM_TO 0x000006c0 | ||
387 | #define MSR_LBR_CORE_FROM 0x00000040 | ||
388 | #define MSR_LBR_CORE_TO 0x00000060 | ||
389 | |||
390 | #define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ | ||
391 | #define LBR_INFO_MISPRED BIT_ULL(63) | ||
392 | #define LBR_INFO_IN_TX BIT_ULL(62) | ||
393 | #define LBR_INFO_ABORT BIT_ULL(61) | ||
394 | #define LBR_INFO_CYCLES 0xffff | ||
395 | |||
396 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | ||
397 | #define MSR_IA32_DS_AREA 0x00000600 | ||
398 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | ||
399 | #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 | ||
400 | |||
401 | #define MSR_IA32_RTIT_CTL 0x00000570 | ||
402 | #define MSR_IA32_RTIT_STATUS 0x00000571 | ||
403 | #define MSR_IA32_RTIT_ADDR0_A 0x00000580 | ||
404 | #define MSR_IA32_RTIT_ADDR0_B 0x00000581 | ||
405 | #define MSR_IA32_RTIT_ADDR1_A 0x00000582 | ||
406 | #define MSR_IA32_RTIT_ADDR1_B 0x00000583 | ||
407 | #define MSR_IA32_RTIT_ADDR2_A 0x00000584 | ||
408 | #define MSR_IA32_RTIT_ADDR2_B 0x00000585 | ||
409 | #define MSR_IA32_RTIT_ADDR3_A 0x00000586 | ||
410 | #define MSR_IA32_RTIT_ADDR3_B 0x00000587 | ||
411 | #define MSR_IA32_RTIT_CR3_MATCH 0x00000572 | ||
412 | #define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 | ||
413 | #define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 | ||
414 | |||
415 | #define MSR_MTRRfix64K_00000 0x00000250 | ||
416 | #define MSR_MTRRfix16K_80000 0x00000258 | ||
417 | #define MSR_MTRRfix16K_A0000 0x00000259 | ||
418 | #define MSR_MTRRfix4K_C0000 0x00000268 | ||
419 | #define MSR_MTRRfix4K_C8000 0x00000269 | ||
420 | #define MSR_MTRRfix4K_D0000 0x0000026a | ||
421 | #define MSR_MTRRfix4K_D8000 0x0000026b | ||
422 | #define MSR_MTRRfix4K_E0000 0x0000026c | ||
423 | #define MSR_MTRRfix4K_E8000 0x0000026d | ||
424 | #define MSR_MTRRfix4K_F0000 0x0000026e | ||
425 | #define MSR_MTRRfix4K_F8000 0x0000026f | ||
426 | #define MSR_MTRRdefType 0x000002ff | ||
427 | |||
428 | #define MSR_IA32_CR_PAT 0x00000277 | ||
429 | |||
430 | #define MSR_IA32_DEBUGCTLMSR 0x000001d9 | ||
431 | #define MSR_IA32_LASTBRANCHFROMIP 0x000001db | ||
432 | #define MSR_IA32_LASTBRANCHTOIP 0x000001dc | ||
433 | #define MSR_IA32_LASTINTFROMIP 0x000001dd | ||
434 | #define MSR_IA32_LASTINTTOIP 0x000001de | ||
435 | |||
436 | /* DEBUGCTLMSR bits (others vary by model): */ | ||
437 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ | ||
438 | #define DEBUGCTLMSR_BTF_SHIFT 1 | ||
439 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ | ||
440 | #define DEBUGCTLMSR_TR (1UL << 6) | ||
441 | #define DEBUGCTLMSR_BTS (1UL << 7) | ||
442 | #define DEBUGCTLMSR_BTINT (1UL << 8) | ||
443 | #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) | ||
444 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) | ||
445 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) | ||
446 | #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 | ||
447 | #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) | ||
448 | |||
449 | #define MSR_PEBS_FRONTEND 0x000003f7 | ||
450 | |||
451 | #define MSR_IA32_POWER_CTL 0x000001fc | ||
452 | |||
453 | #define MSR_IA32_MC0_CTL 0x00000400 | ||
454 | #define MSR_IA32_MC0_STATUS 0x00000401 | ||
455 | #define MSR_IA32_MC0_ADDR 0x00000402 | ||
456 | #define MSR_IA32_MC0_MISC 0x00000403 | ||
457 | |||
458 | /* C-state Residency Counters */ | ||
459 | #define MSR_PKG_C3_RESIDENCY 0x000003f8 | ||
460 | #define MSR_PKG_C6_RESIDENCY 0x000003f9 | ||
461 | #define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa | ||
462 | #define MSR_PKG_C7_RESIDENCY 0x000003fa | ||
463 | #define MSR_CORE_C3_RESIDENCY 0x000003fc | ||
464 | #define MSR_CORE_C6_RESIDENCY 0x000003fd | ||
465 | #define MSR_CORE_C7_RESIDENCY 0x000003fe | ||
466 | #define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff | ||
467 | #define MSR_PKG_C2_RESIDENCY 0x0000060d | ||
468 | #define MSR_PKG_C8_RESIDENCY 0x00000630 | ||
469 | #define MSR_PKG_C9_RESIDENCY 0x00000631 | ||
470 | #define MSR_PKG_C10_RESIDENCY 0x00000632 | ||
471 | |||
472 | /* Interrupt Response Limit */ | ||
473 | #define MSR_PKGC3_IRTL 0x0000060a | ||
474 | #define MSR_PKGC6_IRTL 0x0000060b | ||
475 | #define MSR_PKGC7_IRTL 0x0000060c | ||
476 | #define MSR_PKGC8_IRTL 0x00000633 | ||
477 | #define MSR_PKGC9_IRTL 0x00000634 | ||
478 | #define MSR_PKGC10_IRTL 0x00000635 | ||
479 | |||
480 | /* Run Time Average Power Limiting (RAPL) Interface */ | ||
481 | |||
482 | #define MSR_RAPL_POWER_UNIT 0x00000606 | ||
483 | |||
484 | #define MSR_PKG_POWER_LIMIT 0x00000610 | ||
485 | #define MSR_PKG_ENERGY_STATUS 0x00000611 | ||
486 | #define MSR_PKG_PERF_STATUS 0x00000613 | ||
487 | #define MSR_PKG_POWER_INFO 0x00000614 | ||
488 | |||
489 | #define MSR_DRAM_POWER_LIMIT 0x00000618 | ||
490 | #define MSR_DRAM_ENERGY_STATUS 0x00000619 | ||
491 | #define MSR_DRAM_PERF_STATUS 0x0000061b | ||
492 | #define MSR_DRAM_POWER_INFO 0x0000061c | ||
493 | |||
494 | #define MSR_PP0_POWER_LIMIT 0x00000638 | ||
495 | #define MSR_PP0_ENERGY_STATUS 0x00000639 | ||
496 | #define MSR_PP0_POLICY 0x0000063a | ||
497 | #define MSR_PP0_PERF_STATUS 0x0000063b | ||
498 | |||
499 | #define MSR_PP1_POWER_LIMIT 0x00000640 | ||
500 | #define MSR_PP1_ENERGY_STATUS 0x00000641 | ||
501 | #define MSR_PP1_POLICY 0x00000642 | ||
502 | |||
503 | /* Config TDP MSRs */ | ||
504 | #define MSR_CONFIG_TDP_NOMINAL 0x00000648 | ||
505 | #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 | ||
506 | #define MSR_CONFIG_TDP_LEVEL_2 0x0000064A | ||
507 | #define MSR_CONFIG_TDP_CONTROL 0x0000064B | ||
508 | #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C | ||
509 | |||
510 | #define MSR_PLATFORM_ENERGY_STATUS 0x0000064D | ||
511 | |||
512 | #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 | ||
513 | #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 | ||
514 | #define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A | ||
515 | #define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B | ||
516 | |||
517 | #define MSR_CORE_C1_RES 0x00000660 | ||
518 | #define MSR_MODULE_C6_RES_MS 0x00000664 | ||
519 | |||
520 | #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 | ||
521 | #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 | ||
522 | |||
523 | #define MSR_ATOM_CORE_RATIOS 0x0000066a | ||
524 | #define MSR_ATOM_CORE_VIDS 0x0000066b | ||
525 | #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c | ||
526 | #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d | ||
527 | |||
528 | |||
529 | #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 | ||
530 | #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 | ||
531 | #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 | ||
532 | |||
533 | /* Hardware P state interface */ | ||
534 | #define MSR_PPERF 0x0000064e | ||
535 | #define MSR_PERF_LIMIT_REASONS 0x0000064f | ||
536 | #define MSR_PM_ENABLE 0x00000770 | ||
537 | #define MSR_HWP_CAPABILITIES 0x00000771 | ||
538 | #define MSR_HWP_REQUEST_PKG 0x00000772 | ||
539 | #define MSR_HWP_INTERRUPT 0x00000773 | ||
540 | #define MSR_HWP_REQUEST 0x00000774 | ||
541 | #define MSR_HWP_STATUS 0x00000777 | ||
542 | |||
543 | /* CPUID.6.EAX */ | ||
544 | #define HWP_BASE_BIT (1<<7) | ||
545 | #define HWP_NOTIFICATIONS_BIT (1<<8) | ||
546 | #define HWP_ACTIVITY_WINDOW_BIT (1<<9) | ||
547 | #define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) | ||
548 | #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) | ||
549 | |||
550 | /* IA32_HWP_CAPABILITIES */ | ||
551 | #define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) | ||
552 | #define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) | ||
553 | #define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) | ||
554 | #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) | ||
555 | |||
556 | /* IA32_HWP_REQUEST */ | ||
557 | #define HWP_MIN_PERF(x) (x & 0xff) | ||
558 | #define HWP_MAX_PERF(x) ((x & 0xff) << 8) | ||
559 | #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) | ||
560 | #define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) | ||
561 | #define HWP_EPP_PERFORMANCE 0x00 | ||
562 | #define HWP_EPP_BALANCE_PERFORMANCE 0x80 | ||
563 | #define HWP_EPP_BALANCE_POWERSAVE 0xC0 | ||
564 | #define HWP_EPP_POWERSAVE 0xFF | ||
565 | #define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) | ||
566 | #define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) | ||
567 | |||
568 | /* IA32_HWP_STATUS */ | ||
569 | #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) | ||
570 | #define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) | ||
571 | |||
572 | /* IA32_HWP_INTERRUPT */ | ||
573 | #define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) | ||
574 | #define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) | ||
575 | |||
576 | #define MSR_AMD64_MC0_MASK 0xc0010044 | ||
577 | |||
578 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) | ||
579 | #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) | ||
580 | #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) | ||
581 | #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) | ||
582 | |||
583 | #define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) | ||
584 | |||
585 | /* These are consecutive and not in the normal 4er MCE bank block */ | ||
586 | #define MSR_IA32_MC0_CTL2 0x00000280 | ||
587 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) | ||
588 | |||
589 | #define MSR_P6_PERFCTR0 0x000000c1 | ||
590 | #define MSR_P6_PERFCTR1 0x000000c2 | ||
591 | #define MSR_P6_EVNTSEL0 0x00000186 | ||
592 | #define MSR_P6_EVNTSEL1 0x00000187 | ||
593 | |||
594 | #define MSR_KNC_PERFCTR0 0x00000020 | ||
595 | #define MSR_KNC_PERFCTR1 0x00000021 | ||
596 | #define MSR_KNC_EVNTSEL0 0x00000028 | ||
597 | #define MSR_KNC_EVNTSEL1 0x00000029 | ||
598 | |||
599 | /* Alternative perfctr range with full access. */ | ||
600 | #define MSR_IA32_PMC0 0x000004c1 | ||
601 | |||
602 | /* AMD64 MSRs. Not complete. See the architecture manual for a more | ||
603 | complete list. */ | ||
604 | |||
605 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b | ||
606 | #define MSR_AMD64_TSC_RATIO 0xc0000104 | ||
607 | #define MSR_AMD64_NB_CFG 0xc001001f | ||
608 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | ||
609 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 | ||
610 | #define MSR_AMD64_OSVW_STATUS 0xc0010141 | ||
611 | #define MSR_AMD64_LS_CFG 0xc0011020 | ||
612 | #define MSR_AMD64_DC_CFG 0xc0011022 | ||
613 | #define MSR_AMD64_BU_CFG2 0xc001102a | ||
614 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | ||
615 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 | ||
616 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 | ||
617 | #define MSR_AMD64_IBSFETCH_REG_COUNT 3 | ||
618 | #define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) | ||
619 | #define MSR_AMD64_IBSOPCTL 0xc0011033 | ||
620 | #define MSR_AMD64_IBSOPRIP 0xc0011034 | ||
621 | #define MSR_AMD64_IBSOPDATA 0xc0011035 | ||
622 | #define MSR_AMD64_IBSOPDATA2 0xc0011036 | ||
623 | #define MSR_AMD64_IBSOPDATA3 0xc0011037 | ||
624 | #define MSR_AMD64_IBSDCLINAD 0xc0011038 | ||
625 | #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 | ||
626 | #define MSR_AMD64_IBSOP_REG_COUNT 7 | ||
627 | #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) | ||
628 | #define MSR_AMD64_IBSCTL 0xc001103a | ||
629 | #define MSR_AMD64_IBSBRTARGET 0xc001103b | ||
630 | #define MSR_AMD64_IBSOPDATA4 0xc001103d | ||
631 | #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ | ||
632 | #define MSR_AMD64_SEV 0xc0010131 | ||
633 | #define MSR_AMD64_SEV_ENABLED_BIT 0 | ||
634 | #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) | ||
635 | |||
636 | /* Fam 17h MSRs */ | ||
637 | #define MSR_F17H_IRPERF 0xc00000e9 | ||
638 | |||
639 | /* Fam 16h MSRs */ | ||
640 | #define MSR_F16H_L2I_PERF_CTL 0xc0010230 | ||
641 | #define MSR_F16H_L2I_PERF_CTR 0xc0010231 | ||
642 | #define MSR_F16H_DR1_ADDR_MASK 0xc0011019 | ||
643 | #define MSR_F16H_DR2_ADDR_MASK 0xc001101a | ||
644 | #define MSR_F16H_DR3_ADDR_MASK 0xc001101b | ||
645 | #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 | ||
646 | |||
647 | /* Fam 15h MSRs */ | ||
648 | #define MSR_F15H_PERF_CTL 0xc0010200 | ||
649 | #define MSR_F15H_PERF_CTR 0xc0010201 | ||
650 | #define MSR_F15H_NB_PERF_CTL 0xc0010240 | ||
651 | #define MSR_F15H_NB_PERF_CTR 0xc0010241 | ||
652 | #define MSR_F15H_PTSC 0xc0010280 | ||
653 | #define MSR_F15H_IC_CFG 0xc0011021 | ||
654 | |||
655 | /* Fam 10h MSRs */ | ||
656 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 | ||
657 | #define FAM10H_MMIO_CONF_ENABLE (1<<0) | ||
658 | #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf | ||
659 | #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 | ||
660 | #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL | ||
661 | #define FAM10H_MMIO_CONF_BASE_SHIFT 20 | ||
662 | #define MSR_FAM10H_NODE_ID 0xc001100c | ||
663 | #define MSR_F10H_DECFG 0xc0011029 | ||
664 | #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 | ||
665 | #define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) | ||
666 | |||
667 | /* K8 MSRs */ | ||
668 | #define MSR_K8_TOP_MEM1 0xc001001a | ||
669 | #define MSR_K8_TOP_MEM2 0xc001001d | ||
670 | #define MSR_K8_SYSCFG 0xc0010010 | ||
671 | #define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 | ||
672 | #define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) | ||
673 | #define MSR_K8_INT_PENDING_MSG 0xc0010055 | ||
674 | /* C1E active bits in int pending message */ | ||
675 | #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 | ||
676 | #define MSR_K8_TSEG_ADDR 0xc0010112 | ||
677 | #define MSR_K8_TSEG_MASK 0xc0010113 | ||
678 | #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ | ||
679 | #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ | ||
680 | #define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ | ||
681 | |||
682 | /* K7 MSRs */ | ||
683 | #define MSR_K7_EVNTSEL0 0xc0010000 | ||
684 | #define MSR_K7_PERFCTR0 0xc0010004 | ||
685 | #define MSR_K7_EVNTSEL1 0xc0010001 | ||
686 | #define MSR_K7_PERFCTR1 0xc0010005 | ||
687 | #define MSR_K7_EVNTSEL2 0xc0010002 | ||
688 | #define MSR_K7_PERFCTR2 0xc0010006 | ||
689 | #define MSR_K7_EVNTSEL3 0xc0010003 | ||
690 | #define MSR_K7_PERFCTR3 0xc0010007 | ||
691 | #define MSR_K7_CLK_CTL 0xc001001b | ||
692 | #define MSR_K7_HWCR 0xc0010015 | ||
693 | #define MSR_K7_HWCR_SMMLOCK_BIT 0 | ||
694 | #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) | ||
695 | #define MSR_K7_FID_VID_CTL 0xc0010041 | ||
696 | #define MSR_K7_FID_VID_STATUS 0xc0010042 | ||
697 | |||
698 | /* K6 MSRs */ | ||
699 | #define MSR_K6_WHCR 0xc0000082 | ||
700 | #define MSR_K6_UWCCR 0xc0000085 | ||
701 | #define MSR_K6_EPMR 0xc0000086 | ||
702 | #define MSR_K6_PSOR 0xc0000087 | ||
703 | #define MSR_K6_PFIR 0xc0000088 | ||
704 | |||
705 | /* Centaur-Hauls/IDT defined MSRs. */ | ||
706 | #define MSR_IDT_FCR1 0x00000107 | ||
707 | #define MSR_IDT_FCR2 0x00000108 | ||
708 | #define MSR_IDT_FCR3 0x00000109 | ||
709 | #define MSR_IDT_FCR4 0x0000010a | ||
710 | |||
711 | #define MSR_IDT_MCR0 0x00000110 | ||
712 | #define MSR_IDT_MCR1 0x00000111 | ||
713 | #define MSR_IDT_MCR2 0x00000112 | ||
714 | #define MSR_IDT_MCR3 0x00000113 | ||
715 | #define MSR_IDT_MCR4 0x00000114 | ||
716 | #define MSR_IDT_MCR5 0x00000115 | ||
717 | #define MSR_IDT_MCR6 0x00000116 | ||
718 | #define MSR_IDT_MCR7 0x00000117 | ||
719 | #define MSR_IDT_MCR_CTRL 0x00000120 | ||
720 | |||
721 | /* VIA Cyrix defined MSRs*/ | ||
722 | #define MSR_VIA_FCR 0x00001107 | ||
723 | #define MSR_VIA_LONGHAUL 0x0000110a | ||
724 | #define MSR_VIA_RNG 0x0000110b | ||
725 | #define MSR_VIA_BCR2 0x00001147 | ||
726 | |||
727 | /* Transmeta defined MSRs */ | ||
728 | #define MSR_TMTA_LONGRUN_CTRL 0x80868010 | ||
729 | #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 | ||
730 | #define MSR_TMTA_LRTI_READOUT 0x80868018 | ||
731 | #define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a | ||
732 | |||
733 | /* Intel defined MSRs. */ | ||
734 | #define MSR_IA32_P5_MC_ADDR 0x00000000 | ||
735 | #define MSR_IA32_P5_MC_TYPE 0x00000001 | ||
736 | #define MSR_IA32_TSC 0x00000010 | ||
737 | #define MSR_IA32_PLATFORM_ID 0x00000017 | ||
738 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | ||
739 | #define MSR_EBC_FREQUENCY_ID 0x0000002c | ||
740 | #define MSR_SMI_COUNT 0x00000034 | ||
741 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | ||
742 | #define MSR_IA32_TSC_ADJUST 0x0000003b | ||
743 | #define MSR_IA32_BNDCFGS 0x00000d90 | ||
744 | |||
745 | #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc | ||
746 | |||
747 | #define MSR_IA32_XSS 0x00000da0 | ||
748 | |||
749 | #define FEATURE_CONTROL_LOCKED (1<<0) | ||
750 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) | ||
751 | #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) | ||
752 | #define FEATURE_CONTROL_LMCE (1<<20) | ||
753 | |||
754 | #define MSR_IA32_APICBASE 0x0000001b | ||
755 | #define MSR_IA32_APICBASE_BSP (1<<8) | ||
756 | #define MSR_IA32_APICBASE_ENABLE (1<<11) | ||
757 | #define MSR_IA32_APICBASE_BASE (0xfffff<<12) | ||
758 | |||
759 | #define MSR_IA32_TSCDEADLINE 0x000006e0 | ||
760 | |||
761 | #define MSR_IA32_UCODE_WRITE 0x00000079 | ||
762 | #define MSR_IA32_UCODE_REV 0x0000008b | ||
763 | |||
764 | #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b | ||
765 | #define MSR_IA32_SMBASE 0x0000009e | ||
766 | |||
767 | #define MSR_IA32_PERF_STATUS 0x00000198 | ||
768 | #define MSR_IA32_PERF_CTL 0x00000199 | ||
769 | #define INTEL_PERF_CTL_MASK 0xffff | ||
770 | #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 | ||
771 | #define MSR_AMD_PERF_STATUS 0xc0010063 | ||
772 | #define MSR_AMD_PERF_CTL 0xc0010062 | ||
773 | |||
774 | #define MSR_IA32_MPERF 0x000000e7 | ||
775 | #define MSR_IA32_APERF 0x000000e8 | ||
776 | |||
777 | #define MSR_IA32_THERM_CONTROL 0x0000019a | ||
778 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b | ||
779 | |||
780 | #define THERM_INT_HIGH_ENABLE (1 << 0) | ||
781 | #define THERM_INT_LOW_ENABLE (1 << 1) | ||
782 | #define THERM_INT_PLN_ENABLE (1 << 24) | ||
783 | |||
784 | #define MSR_IA32_THERM_STATUS 0x0000019c | ||
785 | |||
786 | #define THERM_STATUS_PROCHOT (1 << 0) | ||
787 | #define THERM_STATUS_POWER_LIMIT (1 << 10) | ||
788 | |||
789 | #define MSR_THERM2_CTL 0x0000019d | ||
790 | |||
791 | #define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) | ||
792 | |||
793 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | ||
794 | |||
795 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 | ||
796 | |||
797 | #define MSR_MISC_FEATURE_CONTROL 0x000001a4 | ||
798 | #define MSR_MISC_PWR_MGMT 0x000001aa | ||
799 | |||
800 | #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 | ||
801 | #define ENERGY_PERF_BIAS_PERFORMANCE 0 | ||
802 | #define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 | ||
803 | #define ENERGY_PERF_BIAS_NORMAL 6 | ||
804 | #define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 | ||
805 | #define ENERGY_PERF_BIAS_POWERSAVE 15 | ||
806 | |||
807 | #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 | ||
808 | |||
809 | #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) | ||
810 | #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) | ||
811 | |||
812 | #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 | ||
813 | |||
814 | #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) | ||
815 | #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) | ||
816 | #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) | ||
817 | |||
818 | /* Thermal Thresholds Support */ | ||
819 | #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) | ||
820 | #define THERM_SHIFT_THRESHOLD0 8 | ||
821 | #define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) | ||
822 | #define THERM_INT_THRESHOLD1_ENABLE (1 << 23) | ||
823 | #define THERM_SHIFT_THRESHOLD1 16 | ||
824 | #define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) | ||
825 | #define THERM_STATUS_THRESHOLD0 (1 << 6) | ||
826 | #define THERM_LOG_THRESHOLD0 (1 << 7) | ||
827 | #define THERM_STATUS_THRESHOLD1 (1 << 8) | ||
828 | #define THERM_LOG_THRESHOLD1 (1 << 9) | ||
829 | |||
830 | /* MISC_ENABLE bits: architectural */ | ||
831 | #define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 | ||
832 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) | ||
833 | #define MSR_IA32_MISC_ENABLE_TCC_BIT 1 | ||
834 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) | ||
835 | #define MSR_IA32_MISC_ENABLE_EMON_BIT 7 | ||
836 | #define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) | ||
837 | #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 | ||
838 | #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) | ||
839 | #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 | ||
840 | #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) | ||
841 | #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 | ||
842 | #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) | ||
843 | #define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 | ||
844 | #define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) | ||
845 | #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 | ||
846 | #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) | ||
847 | #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 | ||
848 | #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) | ||
849 | #define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 | ||
850 | #define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) | ||
851 | |||
852 | /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ | ||
853 | #define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 | ||
854 | #define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) | ||
855 | #define MSR_IA32_MISC_ENABLE_TM1_BIT 3 | ||
856 | #define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) | ||
857 | #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 | ||
858 | #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) | ||
859 | #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 | ||
860 | #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) | ||
861 | #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 | ||
862 | #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) | ||
863 | #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 | ||
864 | #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) | ||
865 | #define MSR_IA32_MISC_ENABLE_FERR_BIT 10 | ||
866 | #define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) | ||
867 | #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 | ||
868 | #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) | ||
869 | #define MSR_IA32_MISC_ENABLE_TM2_BIT 13 | ||
870 | #define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) | ||
871 | #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 | ||
872 | #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) | ||
873 | #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 | ||
874 | #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) | ||
875 | #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 | ||
876 | #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) | ||
877 | #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 | ||
878 | #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) | ||
879 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 | ||
880 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) | ||
881 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 | ||
882 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) | ||
883 | |||
884 | /* MISC_FEATURES_ENABLES non-architectural features */ | ||
885 | #define MSR_MISC_FEATURES_ENABLES 0x00000140 | ||
886 | |||
887 | #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 | ||
888 | #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) | ||
889 | #define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 | ||
890 | |||
891 | #define MSR_IA32_TSC_DEADLINE 0x000006E0 | ||
892 | |||
893 | /* P4/Xeon+ specific */ | ||
894 | #define MSR_IA32_MCG_EAX 0x00000180 | ||
895 | #define MSR_IA32_MCG_EBX 0x00000181 | ||
896 | #define MSR_IA32_MCG_ECX 0x00000182 | ||
897 | #define MSR_IA32_MCG_EDX 0x00000183 | ||
898 | #define MSR_IA32_MCG_ESI 0x00000184 | ||
899 | #define MSR_IA32_MCG_EDI 0x00000185 | ||
900 | #define MSR_IA32_MCG_EBP 0x00000186 | ||
901 | #define MSR_IA32_MCG_ESP 0x00000187 | ||
902 | #define MSR_IA32_MCG_EFLAGS 0x00000188 | ||
903 | #define MSR_IA32_MCG_EIP 0x00000189 | ||
904 | #define MSR_IA32_MCG_RESERVED 0x0000018a | ||
905 | |||
906 | /* Pentium IV performance counter MSRs */ | ||
907 | #define MSR_P4_BPU_PERFCTR0 0x00000300 | ||
908 | #define MSR_P4_BPU_PERFCTR1 0x00000301 | ||
909 | #define MSR_P4_BPU_PERFCTR2 0x00000302 | ||
910 | #define MSR_P4_BPU_PERFCTR3 0x00000303 | ||
911 | #define MSR_P4_MS_PERFCTR0 0x00000304 | ||
912 | #define MSR_P4_MS_PERFCTR1 0x00000305 | ||
913 | #define MSR_P4_MS_PERFCTR2 0x00000306 | ||
914 | #define MSR_P4_MS_PERFCTR3 0x00000307 | ||
915 | #define MSR_P4_FLAME_PERFCTR0 0x00000308 | ||
916 | #define MSR_P4_FLAME_PERFCTR1 0x00000309 | ||
917 | #define MSR_P4_FLAME_PERFCTR2 0x0000030a | ||
918 | #define MSR_P4_FLAME_PERFCTR3 0x0000030b | ||
919 | #define MSR_P4_IQ_PERFCTR0 0x0000030c | ||
920 | #define MSR_P4_IQ_PERFCTR1 0x0000030d | ||
921 | #define MSR_P4_IQ_PERFCTR2 0x0000030e | ||
922 | #define MSR_P4_IQ_PERFCTR3 0x0000030f | ||
923 | #define MSR_P4_IQ_PERFCTR4 0x00000310 | ||
924 | #define MSR_P4_IQ_PERFCTR5 0x00000311 | ||
925 | #define MSR_P4_BPU_CCCR0 0x00000360 | ||
926 | #define MSR_P4_BPU_CCCR1 0x00000361 | ||
927 | #define MSR_P4_BPU_CCCR2 0x00000362 | ||
928 | #define MSR_P4_BPU_CCCR3 0x00000363 | ||
929 | #define MSR_P4_MS_CCCR0 0x00000364 | ||
930 | #define MSR_P4_MS_CCCR1 0x00000365 | ||
931 | #define MSR_P4_MS_CCCR2 0x00000366 | ||
932 | #define MSR_P4_MS_CCCR3 0x00000367 | ||
933 | #define MSR_P4_FLAME_CCCR0 0x00000368 | ||
934 | #define MSR_P4_FLAME_CCCR1 0x00000369 | ||
935 | #define MSR_P4_FLAME_CCCR2 0x0000036a | ||
936 | #define MSR_P4_FLAME_CCCR3 0x0000036b | ||
937 | #define MSR_P4_IQ_CCCR0 0x0000036c | ||
938 | #define MSR_P4_IQ_CCCR1 0x0000036d | ||
939 | #define MSR_P4_IQ_CCCR2 0x0000036e | ||
940 | #define MSR_P4_IQ_CCCR3 0x0000036f | ||
941 | #define MSR_P4_IQ_CCCR4 0x00000370 | ||
942 | #define MSR_P4_IQ_CCCR5 0x00000371 | ||
943 | #define MSR_P4_ALF_ESCR0 0x000003ca | ||
944 | #define MSR_P4_ALF_ESCR1 0x000003cb | ||
945 | #define MSR_P4_BPU_ESCR0 0x000003b2 | ||
946 | #define MSR_P4_BPU_ESCR1 0x000003b3 | ||
947 | #define MSR_P4_BSU_ESCR0 0x000003a0 | ||
948 | #define MSR_P4_BSU_ESCR1 0x000003a1 | ||
949 | #define MSR_P4_CRU_ESCR0 0x000003b8 | ||
950 | #define MSR_P4_CRU_ESCR1 0x000003b9 | ||
951 | #define MSR_P4_CRU_ESCR2 0x000003cc | ||
952 | #define MSR_P4_CRU_ESCR3 0x000003cd | ||
953 | #define MSR_P4_CRU_ESCR4 0x000003e0 | ||
954 | #define MSR_P4_CRU_ESCR5 0x000003e1 | ||
955 | #define MSR_P4_DAC_ESCR0 0x000003a8 | ||
956 | #define MSR_P4_DAC_ESCR1 0x000003a9 | ||
957 | #define MSR_P4_FIRM_ESCR0 0x000003a4 | ||
958 | #define MSR_P4_FIRM_ESCR1 0x000003a5 | ||
959 | #define MSR_P4_FLAME_ESCR0 0x000003a6 | ||
960 | #define MSR_P4_FLAME_ESCR1 0x000003a7 | ||
961 | #define MSR_P4_FSB_ESCR0 0x000003a2 | ||
962 | #define MSR_P4_FSB_ESCR1 0x000003a3 | ||
963 | #define MSR_P4_IQ_ESCR0 0x000003ba | ||
964 | #define MSR_P4_IQ_ESCR1 0x000003bb | ||
965 | #define MSR_P4_IS_ESCR0 0x000003b4 | ||
966 | #define MSR_P4_IS_ESCR1 0x000003b5 | ||
967 | #define MSR_P4_ITLB_ESCR0 0x000003b6 | ||
968 | #define MSR_P4_ITLB_ESCR1 0x000003b7 | ||
969 | #define MSR_P4_IX_ESCR0 0x000003c8 | ||
970 | #define MSR_P4_IX_ESCR1 0x000003c9 | ||
971 | #define MSR_P4_MOB_ESCR0 0x000003aa | ||
972 | #define MSR_P4_MOB_ESCR1 0x000003ab | ||
973 | #define MSR_P4_MS_ESCR0 0x000003c0 | ||
974 | #define MSR_P4_MS_ESCR1 0x000003c1 | ||
975 | #define MSR_P4_PMH_ESCR0 0x000003ac | ||
976 | #define MSR_P4_PMH_ESCR1 0x000003ad | ||
977 | #define MSR_P4_RAT_ESCR0 0x000003bc | ||
978 | #define MSR_P4_RAT_ESCR1 0x000003bd | ||
979 | #define MSR_P4_SAAT_ESCR0 0x000003ae | ||
980 | #define MSR_P4_SAAT_ESCR1 0x000003af | ||
981 | #define MSR_P4_SSU_ESCR0 0x000003be | ||
982 | #define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ | ||
983 | |||
984 | #define MSR_P4_TBPU_ESCR0 0x000003c2 | ||
985 | #define MSR_P4_TBPU_ESCR1 0x000003c3 | ||
986 | #define MSR_P4_TC_ESCR0 0x000003c4 | ||
987 | #define MSR_P4_TC_ESCR1 0x000003c5 | ||
988 | #define MSR_P4_U2L_ESCR0 0x000003b0 | ||
989 | #define MSR_P4_U2L_ESCR1 0x000003b1 | ||
990 | |||
991 | #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 | ||
992 | |||
993 | /* Intel Core-based CPU performance counters */ | ||
994 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 | ||
995 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a | ||
996 | #define MSR_CORE_PERF_FIXED_CTR2 0x0000030b | ||
997 | #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d | ||
998 | #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e | ||
999 | #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f | ||
1000 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 | ||
1001 | |||
1002 | /* Geode defined MSRs */ | ||
1003 | #define MSR_GEODE_BUSCONT_CONF0 0x00001900 | ||
1004 | |||
1005 | /* Intel VT MSRs */ | ||
1006 | #define MSR_IA32_VMX_BASIC 0x00000480 | ||
1007 | #define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 | ||
1008 | #define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 | ||
1009 | #define MSR_IA32_VMX_EXIT_CTLS 0x00000483 | ||
1010 | #define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 | ||
1011 | #define MSR_IA32_VMX_MISC 0x00000485 | ||
1012 | #define MSR_IA32_VMX_CR0_FIXED0 0x00000486 | ||
1013 | #define MSR_IA32_VMX_CR0_FIXED1 0x00000487 | ||
1014 | #define MSR_IA32_VMX_CR4_FIXED0 0x00000488 | ||
1015 | #define MSR_IA32_VMX_CR4_FIXED1 0x00000489 | ||
1016 | #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a | ||
1017 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b | ||
1018 | #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c | ||
1019 | #define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d | ||
1020 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e | ||
1021 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f | ||
1022 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 | ||
1023 | #define MSR_IA32_VMX_VMFUNC 0x00000491 | ||
1024 | |||
1025 | /* VMX_BASIC bits and bitmasks */ | ||
1026 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 | ||
1027 | #define VMX_BASIC_TRUE_CTLS (1ULL << 55) | ||
1028 | #define VMX_BASIC_64 0x0001000000000000LLU | ||
1029 | #define VMX_BASIC_MEM_TYPE_SHIFT 50 | ||
1030 | #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU | ||
1031 | #define VMX_BASIC_MEM_TYPE_WB 6LLU | ||
1032 | #define VMX_BASIC_INOUT 0x0040000000000000LLU | ||
1033 | |||
1034 | /* MSR_IA32_VMX_MISC bits */ | ||
1035 | #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) | ||
1036 | #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F | ||
1037 | /* AMD-V MSRs */ | ||
1038 | |||
1039 | #define MSR_VM_CR 0xc0010114 | ||
1040 | #define MSR_VM_IGNNE 0xc0010115 | ||
1041 | #define MSR_VM_HSAVE_PA 0xc0010117 | ||
1042 | |||
1043 | #endif /* !SELFTEST_KVM_X86_H */ | ||
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c new file mode 100644 index 000000000000..c9f5b7d4ce38 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/assert.c | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/lib/assert.c | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | */ | ||
8 | |||
9 | #define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ | ||
10 | |||
11 | #include "test_util.h" | ||
12 | |||
13 | #include <execinfo.h> | ||
14 | #include <sys/syscall.h> | ||
15 | |||
16 | /* Dumps the current stack trace to stderr. */ | ||
17 | static void __attribute__((noinline)) test_dump_stack(void); | ||
18 | static void test_dump_stack(void) | ||
19 | { | ||
20 | /* | ||
21 | * Build and run this command: | ||
22 | * | ||
23 | * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \ | ||
24 | * grep -v test_dump_stack | cat -n 1>&2 | ||
25 | * | ||
26 | * Note that the spacing is different and there's no newline. | ||
27 | */ | ||
28 | size_t i; | ||
29 | size_t n = 20; | ||
30 | void *stack[n]; | ||
31 | const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai"; | ||
32 | const char *pipeline = "|cat -n 1>&2"; | ||
33 | char cmd[strlen(addr2line) + strlen(pipeline) + | ||
34 | /* N bytes per addr * 2 digits per byte + 1 space per addr: */ | ||
35 | n * (((sizeof(void *)) * 2) + 1) + | ||
36 | /* Null terminator: */ | ||
37 | 1]; | ||
38 | char *c; | ||
39 | |||
40 | n = backtrace(stack, n); | ||
41 | c = &cmd[0]; | ||
42 | c += sprintf(c, "%s", addr2line); | ||
43 | /* | ||
44 | * Skip the first 3 frames: backtrace, test_dump_stack, and | ||
45 | * test_assert. We hope that backtrace isn't inlined and the other two | ||
46 | * we've declared noinline. | ||
47 | */ | ||
48 | for (i = 2; i < n; i++) | ||
49 | c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1); | ||
50 | c += sprintf(c, "%s", pipeline); | ||
51 | #pragma GCC diagnostic push | ||
52 | #pragma GCC diagnostic ignored "-Wunused-result" | ||
53 | system(cmd); | ||
54 | #pragma GCC diagnostic pop | ||
55 | } | ||
56 | |||
57 | static pid_t gettid(void) | ||
58 | { | ||
59 | return syscall(SYS_gettid); | ||
60 | } | ||
61 | |||
62 | void __attribute__((noinline)) | ||
63 | test_assert(bool exp, const char *exp_str, | ||
64 | const char *file, unsigned int line, const char *fmt, ...) | ||
65 | { | ||
66 | va_list ap; | ||
67 | |||
68 | if (!(exp)) { | ||
69 | va_start(ap, fmt); | ||
70 | |||
71 | fprintf(stderr, "==== Test Assertion Failure ====\n" | ||
72 | " %s:%u: %s\n" | ||
73 | " pid=%d tid=%d\n", | ||
74 | file, line, exp_str, getpid(), gettid()); | ||
75 | test_dump_stack(); | ||
76 | if (fmt) { | ||
77 | fputs(" ", stderr); | ||
78 | vfprintf(stderr, fmt, ap); | ||
79 | fputs("\n", stderr); | ||
80 | } | ||
81 | va_end(ap); | ||
82 | |||
83 | exit(254); | ||
84 | } | ||
85 | |||
86 | return; | ||
87 | } | ||
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c new file mode 100644 index 000000000000..7ca1bb40c498 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util.c | |||
@@ -0,0 +1,1480 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/lib/kvm_util.c | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | */ | ||
8 | |||
9 | #include "test_util.h" | ||
10 | #include "kvm_util.h" | ||
11 | #include "kvm_util_internal.h" | ||
12 | |||
13 | #include <assert.h> | ||
14 | #include <sys/mman.h> | ||
15 | #include <sys/types.h> | ||
16 | #include <sys/stat.h> | ||
17 | |||
18 | #define KVM_DEV_PATH "/dev/kvm" | ||
19 | |||
20 | #define KVM_UTIL_PGS_PER_HUGEPG 512 | ||
21 | #define KVM_UTIL_MIN_PADDR 0x2000 | ||
22 | |||
23 | /* Aligns x up to the next multiple of size. Size must be a power of 2. */ | ||
24 | static void *align(void *x, size_t size) | ||
25 | { | ||
26 | size_t mask = size - 1; | ||
27 | TEST_ASSERT(size != 0 && !(size & (size - 1)), | ||
28 | "size not a power of 2: %lu", size); | ||
29 | return (void *) (((size_t) x + mask) & ~mask); | ||
30 | } | ||
31 | |||
32 | /* Capability | ||
33 | * | ||
34 | * Input Args: | ||
35 | * cap - Capability | ||
36 | * | ||
37 | * Output Args: None | ||
38 | * | ||
39 | * Return: | ||
40 | * On success, the Value corresponding to the capability (KVM_CAP_*) | ||
41 | * specified by the value of cap. On failure a TEST_ASSERT failure | ||
42 | * is produced. | ||
43 | * | ||
44 | * Looks up and returns the value corresponding to the capability | ||
45 | * (KVM_CAP_*) given by cap. | ||
46 | */ | ||
47 | int kvm_check_cap(long cap) | ||
48 | { | ||
49 | int ret; | ||
50 | int kvm_fd; | ||
51 | |||
52 | kvm_fd = open(KVM_DEV_PATH, O_RDONLY); | ||
53 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", | ||
54 | KVM_DEV_PATH, kvm_fd, errno); | ||
55 | |||
56 | ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); | ||
57 | TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" | ||
58 | " rc: %i errno: %i", ret, errno); | ||
59 | |||
60 | close(kvm_fd); | ||
61 | |||
62 | return ret; | ||
63 | } | ||
64 | |||
65 | /* VM Create | ||
66 | * | ||
67 | * Input Args: | ||
68 | * mode - VM Mode (e.g. VM_MODE_FLAT48PG) | ||
69 | * phy_pages - Physical memory pages | ||
70 | * perm - permission | ||
71 | * | ||
72 | * Output Args: None | ||
73 | * | ||
74 | * Return: | ||
75 | * Pointer to opaque structure that describes the created VM. | ||
76 | * | ||
77 | * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG). | ||
78 | * When phy_pages is non-zero, a memory region of phy_pages physical pages | ||
79 | * is created and mapped starting at guest physical address 0. The file | ||
80 | * descriptor to control the created VM is created with the permissions | ||
81 | * given by perm (e.g. O_RDWR). | ||
82 | */ | ||
83 | struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) | ||
84 | { | ||
85 | struct kvm_vm *vm; | ||
86 | int kvm_fd; | ||
87 | |||
88 | /* Allocate memory. */ | ||
89 | vm = calloc(1, sizeof(*vm)); | ||
90 | TEST_ASSERT(vm != NULL, "Insufficent Memory"); | ||
91 | |||
92 | vm->mode = mode; | ||
93 | kvm_fd = open(KVM_DEV_PATH, perm); | ||
94 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", | ||
95 | KVM_DEV_PATH, kvm_fd, errno); | ||
96 | |||
97 | /* Create VM. */ | ||
98 | vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); | ||
99 | TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " | ||
100 | "rc: %i errno: %i", vm->fd, errno); | ||
101 | |||
102 | close(kvm_fd); | ||
103 | |||
104 | /* Setup mode specific traits. */ | ||
105 | switch (vm->mode) { | ||
106 | case VM_MODE_FLAT48PG: | ||
107 | vm->page_size = 0x1000; | ||
108 | vm->page_shift = 12; | ||
109 | |||
110 | /* Limit to 48-bit canonical virtual addresses. */ | ||
111 | vm->vpages_valid = sparsebit_alloc(); | ||
112 | sparsebit_set_num(vm->vpages_valid, | ||
113 | 0, (1ULL << (48 - 1)) >> vm->page_shift); | ||
114 | sparsebit_set_num(vm->vpages_valid, | ||
115 | (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift, | ||
116 | (1ULL << (48 - 1)) >> vm->page_shift); | ||
117 | |||
118 | /* Limit physical addresses to 52-bits. */ | ||
119 | vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1; | ||
120 | break; | ||
121 | |||
122 | default: | ||
123 | TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); | ||
124 | } | ||
125 | |||
126 | /* Allocate and setup memory for guest. */ | ||
127 | vm->vpages_mapped = sparsebit_alloc(); | ||
128 | if (phy_pages != 0) | ||
129 | vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, | ||
130 | 0, 0, phy_pages, 0); | ||
131 | |||
132 | return vm; | ||
133 | } | ||
134 | |||
135 | /* Userspace Memory Region Find | ||
136 | * | ||
137 | * Input Args: | ||
138 | * vm - Virtual Machine | ||
139 | * start - Starting VM physical address | ||
140 | * end - Ending VM physical address, inclusive. | ||
141 | * | ||
142 | * Output Args: None | ||
143 | * | ||
144 | * Return: | ||
145 | * Pointer to overlapping region, NULL if no such region. | ||
146 | * | ||
147 | * Searches for a region with any physical memory that overlaps with | ||
148 | * any portion of the guest physical addresses from start to end | ||
149 | * inclusive. If multiple overlapping regions exist, a pointer to any | ||
150 | * of the regions is returned. Null is returned only when no overlapping | ||
151 | * region exists. | ||
152 | */ | ||
153 | static struct userspace_mem_region *userspace_mem_region_find( | ||
154 | struct kvm_vm *vm, uint64_t start, uint64_t end) | ||
155 | { | ||
156 | struct userspace_mem_region *region; | ||
157 | |||
158 | for (region = vm->userspace_mem_region_head; region; | ||
159 | region = region->next) { | ||
160 | uint64_t existing_start = region->region.guest_phys_addr; | ||
161 | uint64_t existing_end = region->region.guest_phys_addr | ||
162 | + region->region.memory_size - 1; | ||
163 | if (start <= existing_end && end >= existing_start) | ||
164 | return region; | ||
165 | } | ||
166 | |||
167 | return NULL; | ||
168 | } | ||
169 | |||
170 | /* KVM Userspace Memory Region Find | ||
171 | * | ||
172 | * Input Args: | ||
173 | * vm - Virtual Machine | ||
174 | * start - Starting VM physical address | ||
175 | * end - Ending VM physical address, inclusive. | ||
176 | * | ||
177 | * Output Args: None | ||
178 | * | ||
179 | * Return: | ||
180 | * Pointer to overlapping region, NULL if no such region. | ||
181 | * | ||
182 | * Public interface to userspace_mem_region_find. Allows tests to look up | ||
183 | * the memslot datastructure for a given range of guest physical memory. | ||
184 | */ | ||
185 | struct kvm_userspace_memory_region * | ||
186 | kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, | ||
187 | uint64_t end) | ||
188 | { | ||
189 | struct userspace_mem_region *region; | ||
190 | |||
191 | region = userspace_mem_region_find(vm, start, end); | ||
192 | if (!region) | ||
193 | return NULL; | ||
194 | |||
195 | return ®ion->region; | ||
196 | } | ||
197 | |||
198 | /* VCPU Find | ||
199 | * | ||
200 | * Input Args: | ||
201 | * vm - Virtual Machine | ||
202 | * vcpuid - VCPU ID | ||
203 | * | ||
204 | * Output Args: None | ||
205 | * | ||
206 | * Return: | ||
207 | * Pointer to VCPU structure | ||
208 | * | ||
209 | * Locates a vcpu structure that describes the VCPU specified by vcpuid and | ||
210 | * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU | ||
211 | * for the specified vcpuid. | ||
212 | */ | ||
213 | struct vcpu *vcpu_find(struct kvm_vm *vm, | ||
214 | uint32_t vcpuid) | ||
215 | { | ||
216 | struct vcpu *vcpup; | ||
217 | |||
218 | for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) { | ||
219 | if (vcpup->id == vcpuid) | ||
220 | return vcpup; | ||
221 | } | ||
222 | |||
223 | return NULL; | ||
224 | } | ||
225 | |||
226 | /* VM VCPU Remove | ||
227 | * | ||
228 | * Input Args: | ||
229 | * vm - Virtual Machine | ||
230 | * vcpuid - VCPU ID | ||
231 | * | ||
232 | * Output Args: None | ||
233 | * | ||
234 | * Return: None, TEST_ASSERT failures for all error conditions | ||
235 | * | ||
236 | * Within the VM specified by vm, removes the VCPU given by vcpuid. | ||
237 | */ | ||
238 | static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid) | ||
239 | { | ||
240 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
241 | |||
242 | int ret = close(vcpu->fd); | ||
243 | TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " | ||
244 | "errno: %i", ret, errno); | ||
245 | |||
246 | if (vcpu->next) | ||
247 | vcpu->next->prev = vcpu->prev; | ||
248 | if (vcpu->prev) | ||
249 | vcpu->prev->next = vcpu->next; | ||
250 | else | ||
251 | vm->vcpu_head = vcpu->next; | ||
252 | free(vcpu); | ||
253 | } | ||
254 | |||
255 | |||
256 | /* Destroys and frees the VM pointed to by vmp. | ||
257 | */ | ||
258 | void kvm_vm_free(struct kvm_vm *vmp) | ||
259 | { | ||
260 | int ret; | ||
261 | |||
262 | if (vmp == NULL) | ||
263 | return; | ||
264 | |||
265 | /* Free userspace_mem_regions. */ | ||
266 | while (vmp->userspace_mem_region_head) { | ||
267 | struct userspace_mem_region *region | ||
268 | = vmp->userspace_mem_region_head; | ||
269 | |||
270 | region->region.memory_size = 0; | ||
271 | ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, | ||
272 | ®ion->region); | ||
273 | TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " | ||
274 | "rc: %i errno: %i", ret, errno); | ||
275 | |||
276 | vmp->userspace_mem_region_head = region->next; | ||
277 | sparsebit_free(®ion->unused_phy_pages); | ||
278 | ret = munmap(region->mmap_start, region->mmap_size); | ||
279 | TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", | ||
280 | ret, errno); | ||
281 | |||
282 | free(region); | ||
283 | } | ||
284 | |||
285 | /* Free VCPUs. */ | ||
286 | while (vmp->vcpu_head) | ||
287 | vm_vcpu_rm(vmp, vmp->vcpu_head->id); | ||
288 | |||
289 | /* Free sparsebit arrays. */ | ||
290 | sparsebit_free(&vmp->vpages_valid); | ||
291 | sparsebit_free(&vmp->vpages_mapped); | ||
292 | |||
293 | /* Close file descriptor for the VM. */ | ||
294 | ret = close(vmp->fd); | ||
295 | TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" | ||
296 | " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); | ||
297 | |||
298 | /* Free the structure describing the VM. */ | ||
299 | free(vmp); | ||
300 | } | ||
301 | |||
302 | /* Memory Compare, host virtual to guest virtual | ||
303 | * | ||
304 | * Input Args: | ||
305 | * hva - Starting host virtual address | ||
306 | * vm - Virtual Machine | ||
307 | * gva - Starting guest virtual address | ||
308 | * len - number of bytes to compare | ||
309 | * | ||
310 | * Output Args: None | ||
311 | * | ||
312 | * Input/Output Args: None | ||
313 | * | ||
314 | * Return: | ||
315 | * Returns 0 if the bytes starting at hva for a length of len | ||
316 | * are equal the guest virtual bytes starting at gva. Returns | ||
317 | * a value < 0, if bytes at hva are less than those at gva. | ||
318 | * Otherwise a value > 0 is returned. | ||
319 | * | ||
320 | * Compares the bytes starting at the host virtual address hva, for | ||
321 | * a length of len, to the guest bytes starting at the guest virtual | ||
322 | * address given by gva. | ||
323 | */ | ||
324 | int kvm_memcmp_hva_gva(void *hva, | ||
325 | struct kvm_vm *vm, vm_vaddr_t gva, size_t len) | ||
326 | { | ||
327 | size_t amt; | ||
328 | |||
329 | /* Compare a batch of bytes until either a match is found | ||
330 | * or all the bytes have been compared. | ||
331 | */ | ||
332 | for (uintptr_t offset = 0; offset < len; offset += amt) { | ||
333 | uintptr_t ptr1 = (uintptr_t)hva + offset; | ||
334 | |||
335 | /* Determine host address for guest virtual address | ||
336 | * at offset. | ||
337 | */ | ||
338 | uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); | ||
339 | |||
340 | /* Determine amount to compare on this pass. | ||
341 | * Don't allow the comparsion to cross a page boundary. | ||
342 | */ | ||
343 | amt = len - offset; | ||
344 | if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) | ||
345 | amt = vm->page_size - (ptr1 % vm->page_size); | ||
346 | if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) | ||
347 | amt = vm->page_size - (ptr2 % vm->page_size); | ||
348 | |||
349 | assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); | ||
350 | assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); | ||
351 | |||
352 | /* Perform the comparison. If there is a difference | ||
353 | * return that result to the caller, otherwise need | ||
354 | * to continue on looking for a mismatch. | ||
355 | */ | ||
356 | int ret = memcmp((void *)ptr1, (void *)ptr2, amt); | ||
357 | if (ret != 0) | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | /* No mismatch found. Let the caller know the two memory | ||
362 | * areas are equal. | ||
363 | */ | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | /* Allocate an instance of struct kvm_cpuid2 | ||
368 | * | ||
369 | * Input Args: None | ||
370 | * | ||
371 | * Output Args: None | ||
372 | * | ||
373 | * Return: A pointer to the allocated struct. The caller is responsible | ||
374 | * for freeing this struct. | ||
375 | * | ||
376 | * Since kvm_cpuid2 uses a 0-length array to allow a the size of the | ||
377 | * array to be decided at allocation time, allocation is slightly | ||
378 | * complicated. This function uses a reasonable default length for | ||
379 | * the array and performs the appropriate allocation. | ||
380 | */ | ||
381 | struct kvm_cpuid2 *allocate_kvm_cpuid2(void) | ||
382 | { | ||
383 | struct kvm_cpuid2 *cpuid; | ||
384 | int nent = 100; | ||
385 | size_t size; | ||
386 | |||
387 | size = sizeof(*cpuid); | ||
388 | size += nent * sizeof(struct kvm_cpuid_entry2); | ||
389 | cpuid = malloc(size); | ||
390 | if (!cpuid) { | ||
391 | perror("malloc"); | ||
392 | abort(); | ||
393 | } | ||
394 | |||
395 | cpuid->nent = nent; | ||
396 | |||
397 | return cpuid; | ||
398 | } | ||
399 | |||
400 | /* KVM Supported CPUID Get | ||
401 | * | ||
402 | * Input Args: None | ||
403 | * | ||
404 | * Output Args: | ||
405 | * cpuid - The supported KVM CPUID | ||
406 | * | ||
407 | * Return: void | ||
408 | * | ||
409 | * Get the guest CPUID supported by KVM. | ||
410 | */ | ||
411 | void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) | ||
412 | { | ||
413 | int ret; | ||
414 | int kvm_fd; | ||
415 | |||
416 | kvm_fd = open(KVM_DEV_PATH, O_RDONLY); | ||
417 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", | ||
418 | KVM_DEV_PATH, kvm_fd, errno); | ||
419 | |||
420 | ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); | ||
421 | TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", | ||
422 | ret, errno); | ||
423 | |||
424 | close(kvm_fd); | ||
425 | } | ||
426 | |||
427 | /* Locate a cpuid entry. | ||
428 | * | ||
429 | * Input Args: | ||
430 | * cpuid: The cpuid. | ||
431 | * function: The function of the cpuid entry to find. | ||
432 | * | ||
433 | * Output Args: None | ||
434 | * | ||
435 | * Return: A pointer to the cpuid entry. Never returns NULL. | ||
436 | */ | ||
437 | struct kvm_cpuid_entry2 * | ||
438 | find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, | ||
439 | uint32_t index) | ||
440 | { | ||
441 | struct kvm_cpuid_entry2 *entry = NULL; | ||
442 | int i; | ||
443 | |||
444 | for (i = 0; i < cpuid->nent; i++) { | ||
445 | if (cpuid->entries[i].function == function && | ||
446 | cpuid->entries[i].index == index) { | ||
447 | entry = &cpuid->entries[i]; | ||
448 | break; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", | ||
453 | function, index); | ||
454 | return entry; | ||
455 | } | ||
456 | |||
457 | /* VM Userspace Memory Region Add | ||
458 | * | ||
459 | * Input Args: | ||
460 | * vm - Virtual Machine | ||
461 | * backing_src - Storage source for this region. | ||
462 | * NULL to use anonymous memory. | ||
463 | * guest_paddr - Starting guest physical address | ||
464 | * slot - KVM region slot | ||
465 | * npages - Number of physical pages | ||
466 | * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) | ||
467 | * | ||
468 | * Output Args: None | ||
469 | * | ||
470 | * Return: None | ||
471 | * | ||
472 | * Allocates a memory area of the number of pages specified by npages | ||
473 | * and maps it to the VM specified by vm, at a starting physical address | ||
474 | * given by guest_paddr. The region is created with a KVM region slot | ||
475 | * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The | ||
476 | * region is created with the flags given by flags. | ||
477 | */ | ||
478 | void vm_userspace_mem_region_add(struct kvm_vm *vm, | ||
479 | enum vm_mem_backing_src_type src_type, | ||
480 | uint64_t guest_paddr, uint32_t slot, uint64_t npages, | ||
481 | uint32_t flags) | ||
482 | { | ||
483 | int ret; | ||
484 | unsigned long pmem_size = 0; | ||
485 | struct userspace_mem_region *region; | ||
486 | size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; | ||
487 | |||
488 | TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " | ||
489 | "address not on a page boundary.\n" | ||
490 | " guest_paddr: 0x%lx vm->page_size: 0x%x", | ||
491 | guest_paddr, vm->page_size); | ||
492 | TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) | ||
493 | <= vm->max_gfn, "Physical range beyond maximum " | ||
494 | "supported physical address,\n" | ||
495 | " guest_paddr: 0x%lx npages: 0x%lx\n" | ||
496 | " vm->max_gfn: 0x%lx vm->page_size: 0x%x", | ||
497 | guest_paddr, npages, vm->max_gfn, vm->page_size); | ||
498 | |||
499 | /* Confirm a mem region with an overlapping address doesn't | ||
500 | * already exist. | ||
501 | */ | ||
502 | region = (struct userspace_mem_region *) userspace_mem_region_find( | ||
503 | vm, guest_paddr, guest_paddr + npages * vm->page_size); | ||
504 | if (region != NULL) | ||
505 | TEST_ASSERT(false, "overlapping userspace_mem_region already " | ||
506 | "exists\n" | ||
507 | " requested guest_paddr: 0x%lx npages: 0x%lx " | ||
508 | "page_size: 0x%x\n" | ||
509 | " existing guest_paddr: 0x%lx size: 0x%lx", | ||
510 | guest_paddr, npages, vm->page_size, | ||
511 | (uint64_t) region->region.guest_phys_addr, | ||
512 | (uint64_t) region->region.memory_size); | ||
513 | |||
514 | /* Confirm no region with the requested slot already exists. */ | ||
515 | for (region = vm->userspace_mem_region_head; region; | ||
516 | region = region->next) { | ||
517 | if (region->region.slot == slot) | ||
518 | break; | ||
519 | if ((guest_paddr <= (region->region.guest_phys_addr | ||
520 | + region->region.memory_size)) | ||
521 | && ((guest_paddr + npages * vm->page_size) | ||
522 | >= region->region.guest_phys_addr)) | ||
523 | break; | ||
524 | } | ||
525 | if (region != NULL) | ||
526 | TEST_ASSERT(false, "A mem region with the requested slot " | ||
527 | "or overlapping physical memory range already exists.\n" | ||
528 | " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" | ||
529 | " existing slot: %u paddr: 0x%lx size: 0x%lx", | ||
530 | slot, guest_paddr, npages, | ||
531 | region->region.slot, | ||
532 | (uint64_t) region->region.guest_phys_addr, | ||
533 | (uint64_t) region->region.memory_size); | ||
534 | |||
535 | /* Allocate and initialize new mem region structure. */ | ||
536 | region = calloc(1, sizeof(*region)); | ||
537 | TEST_ASSERT(region != NULL, "Insufficient Memory"); | ||
538 | region->mmap_size = npages * vm->page_size; | ||
539 | |||
540 | /* Enough memory to align up to a huge page. */ | ||
541 | if (src_type == VM_MEM_SRC_ANONYMOUS_THP) | ||
542 | region->mmap_size += huge_page_size; | ||
543 | region->mmap_start = mmap(NULL, region->mmap_size, | ||
544 | PROT_READ | PROT_WRITE, | ||
545 | MAP_PRIVATE | MAP_ANONYMOUS | ||
546 | | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0), | ||
547 | -1, 0); | ||
548 | TEST_ASSERT(region->mmap_start != MAP_FAILED, | ||
549 | "test_malloc failed, mmap_start: %p errno: %i", | ||
550 | region->mmap_start, errno); | ||
551 | |||
552 | /* Align THP allocation up to start of a huge page. */ | ||
553 | region->host_mem = align(region->mmap_start, | ||
554 | src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); | ||
555 | |||
556 | /* As needed perform madvise */ | ||
557 | if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { | ||
558 | ret = madvise(region->host_mem, npages * vm->page_size, | ||
559 | src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); | ||
560 | TEST_ASSERT(ret == 0, "madvise failed,\n" | ||
561 | " addr: %p\n" | ||
562 | " length: 0x%lx\n" | ||
563 | " src_type: %x", | ||
564 | region->host_mem, npages * vm->page_size, src_type); | ||
565 | } | ||
566 | |||
567 | region->unused_phy_pages = sparsebit_alloc(); | ||
568 | sparsebit_set_num(region->unused_phy_pages, | ||
569 | guest_paddr >> vm->page_shift, npages); | ||
570 | region->region.slot = slot; | ||
571 | region->region.flags = flags; | ||
572 | region->region.guest_phys_addr = guest_paddr; | ||
573 | region->region.memory_size = npages * vm->page_size; | ||
574 | region->region.userspace_addr = (uintptr_t) region->host_mem; | ||
575 | ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); | ||
576 | TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" | ||
577 | " rc: %i errno: %i\n" | ||
578 | " slot: %u flags: 0x%x\n" | ||
579 | " guest_phys_addr: 0x%lx size: 0x%lx", | ||
580 | ret, errno, slot, flags, | ||
581 | guest_paddr, (uint64_t) region->region.memory_size); | ||
582 | |||
583 | /* Add to linked-list of memory regions. */ | ||
584 | if (vm->userspace_mem_region_head) | ||
585 | vm->userspace_mem_region_head->prev = region; | ||
586 | region->next = vm->userspace_mem_region_head; | ||
587 | vm->userspace_mem_region_head = region; | ||
588 | } | ||
589 | |||
590 | /* Memslot to region | ||
591 | * | ||
592 | * Input Args: | ||
593 | * vm - Virtual Machine | ||
594 | * memslot - KVM memory slot ID | ||
595 | * | ||
596 | * Output Args: None | ||
597 | * | ||
598 | * Return: | ||
599 | * Pointer to memory region structure that describe memory region | ||
600 | * using kvm memory slot ID given by memslot. TEST_ASSERT failure | ||
601 | * on error (e.g. currently no memory region using memslot as a KVM | ||
602 | * memory slot ID). | ||
603 | */ | ||
604 | static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, | ||
605 | uint32_t memslot) | ||
606 | { | ||
607 | struct userspace_mem_region *region; | ||
608 | |||
609 | for (region = vm->userspace_mem_region_head; region; | ||
610 | region = region->next) { | ||
611 | if (region->region.slot == memslot) | ||
612 | break; | ||
613 | } | ||
614 | if (region == NULL) { | ||
615 | fprintf(stderr, "No mem region with the requested slot found,\n" | ||
616 | " requested slot: %u\n", memslot); | ||
617 | fputs("---- vm dump ----\n", stderr); | ||
618 | vm_dump(stderr, vm, 2); | ||
619 | TEST_ASSERT(false, "Mem region not found"); | ||
620 | } | ||
621 | |||
622 | return region; | ||
623 | } | ||
624 | |||
625 | /* VM Memory Region Flags Set | ||
626 | * | ||
627 | * Input Args: | ||
628 | * vm - Virtual Machine | ||
629 | * flags - Starting guest physical address | ||
630 | * | ||
631 | * Output Args: None | ||
632 | * | ||
633 | * Return: None | ||
634 | * | ||
635 | * Sets the flags of the memory region specified by the value of slot, | ||
636 | * to the values given by flags. | ||
637 | */ | ||
638 | void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) | ||
639 | { | ||
640 | int ret; | ||
641 | struct userspace_mem_region *region; | ||
642 | |||
643 | /* Locate memory region. */ | ||
644 | region = memslot2region(vm, slot); | ||
645 | |||
646 | region->region.flags = flags; | ||
647 | |||
648 | ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); | ||
649 | |||
650 | TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" | ||
651 | " rc: %i errno: %i slot: %u flags: 0x%x", | ||
652 | ret, errno, slot, flags); | ||
653 | } | ||
654 | |||
655 | /* VCPU mmap Size | ||
656 | * | ||
657 | * Input Args: None | ||
658 | * | ||
659 | * Output Args: None | ||
660 | * | ||
661 | * Return: | ||
662 | * Size of VCPU state | ||
663 | * | ||
664 | * Returns the size of the structure pointed to by the return value | ||
665 | * of vcpu_state(). | ||
666 | */ | ||
667 | static int vcpu_mmap_sz(void) | ||
668 | { | ||
669 | int dev_fd, ret; | ||
670 | |||
671 | dev_fd = open(KVM_DEV_PATH, O_RDONLY); | ||
672 | TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", | ||
673 | __func__, KVM_DEV_PATH, dev_fd, errno); | ||
674 | |||
675 | ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); | ||
676 | TEST_ASSERT(ret >= sizeof(struct kvm_run), | ||
677 | "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", | ||
678 | __func__, ret, errno); | ||
679 | |||
680 | close(dev_fd); | ||
681 | |||
682 | return ret; | ||
683 | } | ||
684 | |||
685 | /* VM VCPU Add | ||
686 | * | ||
687 | * Input Args: | ||
688 | * vm - Virtual Machine | ||
689 | * vcpuid - VCPU ID | ||
690 | * | ||
691 | * Output Args: None | ||
692 | * | ||
693 | * Return: None | ||
694 | * | ||
695 | * Creates and adds to the VM specified by vm and virtual CPU with | ||
696 | * the ID given by vcpuid. | ||
697 | */ | ||
698 | void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) | ||
699 | { | ||
700 | struct vcpu *vcpu; | ||
701 | |||
702 | /* Confirm a vcpu with the specified id doesn't already exist. */ | ||
703 | vcpu = vcpu_find(vm, vcpuid); | ||
704 | if (vcpu != NULL) | ||
705 | TEST_ASSERT(false, "vcpu with the specified id " | ||
706 | "already exists,\n" | ||
707 | " requested vcpuid: %u\n" | ||
708 | " existing vcpuid: %u state: %p", | ||
709 | vcpuid, vcpu->id, vcpu->state); | ||
710 | |||
711 | /* Allocate and initialize new vcpu structure. */ | ||
712 | vcpu = calloc(1, sizeof(*vcpu)); | ||
713 | TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); | ||
714 | vcpu->id = vcpuid; | ||
715 | vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); | ||
716 | TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", | ||
717 | vcpu->fd, errno); | ||
718 | |||
719 | TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " | ||
720 | "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", | ||
721 | vcpu_mmap_sz(), sizeof(*vcpu->state)); | ||
722 | vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), | ||
723 | PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); | ||
724 | TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " | ||
725 | "vcpu id: %u errno: %i", vcpuid, errno); | ||
726 | |||
727 | /* Add to linked-list of VCPUs. */ | ||
728 | if (vm->vcpu_head) | ||
729 | vm->vcpu_head->prev = vcpu; | ||
730 | vcpu->next = vm->vcpu_head; | ||
731 | vm->vcpu_head = vcpu; | ||
732 | |||
733 | vcpu_setup(vm, vcpuid); | ||
734 | } | ||
735 | |||
736 | /* VM Virtual Address Unused Gap | ||
737 | * | ||
738 | * Input Args: | ||
739 | * vm - Virtual Machine | ||
740 | * sz - Size (bytes) | ||
741 | * vaddr_min - Minimum Virtual Address | ||
742 | * | ||
743 | * Output Args: None | ||
744 | * | ||
745 | * Return: | ||
746 | * Lowest virtual address at or below vaddr_min, with at least | ||
747 | * sz unused bytes. TEST_ASSERT failure if no area of at least | ||
748 | * size sz is available. | ||
749 | * | ||
750 | * Within the VM specified by vm, locates the lowest starting virtual | ||
751 | * address >= vaddr_min, that has at least sz unallocated bytes. A | ||
752 | * TEST_ASSERT failure occurs for invalid input or no area of at least | ||
753 | * sz unallocated bytes >= vaddr_min is available. | ||
754 | */ | ||
755 | static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, | ||
756 | vm_vaddr_t vaddr_min) | ||
757 | { | ||
758 | uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; | ||
759 | |||
760 | /* Determine lowest permitted virtual page index. */ | ||
761 | uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; | ||
762 | if ((pgidx_start * vm->page_size) < vaddr_min) | ||
763 | goto no_va_found; | ||
764 | |||
765 | /* Loop over section with enough valid virtual page indexes. */ | ||
766 | if (!sparsebit_is_set_num(vm->vpages_valid, | ||
767 | pgidx_start, pages)) | ||
768 | pgidx_start = sparsebit_next_set_num(vm->vpages_valid, | ||
769 | pgidx_start, pages); | ||
770 | do { | ||
771 | /* | ||
772 | * Are there enough unused virtual pages available at | ||
773 | * the currently proposed starting virtual page index. | ||
774 | * If not, adjust proposed starting index to next | ||
775 | * possible. | ||
776 | */ | ||
777 | if (sparsebit_is_clear_num(vm->vpages_mapped, | ||
778 | pgidx_start, pages)) | ||
779 | goto va_found; | ||
780 | pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, | ||
781 | pgidx_start, pages); | ||
782 | if (pgidx_start == 0) | ||
783 | goto no_va_found; | ||
784 | |||
785 | /* | ||
786 | * If needed, adjust proposed starting virtual address, | ||
787 | * to next range of valid virtual addresses. | ||
788 | */ | ||
789 | if (!sparsebit_is_set_num(vm->vpages_valid, | ||
790 | pgidx_start, pages)) { | ||
791 | pgidx_start = sparsebit_next_set_num( | ||
792 | vm->vpages_valid, pgidx_start, pages); | ||
793 | if (pgidx_start == 0) | ||
794 | goto no_va_found; | ||
795 | } | ||
796 | } while (pgidx_start != 0); | ||
797 | |||
798 | no_va_found: | ||
799 | TEST_ASSERT(false, "No vaddr of specified pages available, " | ||
800 | "pages: 0x%lx", pages); | ||
801 | |||
802 | /* NOT REACHED */ | ||
803 | return -1; | ||
804 | |||
805 | va_found: | ||
806 | TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, | ||
807 | pgidx_start, pages), | ||
808 | "Unexpected, invalid virtual page index range,\n" | ||
809 | " pgidx_start: 0x%lx\n" | ||
810 | " pages: 0x%lx", | ||
811 | pgidx_start, pages); | ||
812 | TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, | ||
813 | pgidx_start, pages), | ||
814 | "Unexpected, pages already mapped,\n" | ||
815 | " pgidx_start: 0x%lx\n" | ||
816 | " pages: 0x%lx", | ||
817 | pgidx_start, pages); | ||
818 | |||
819 | return pgidx_start * vm->page_size; | ||
820 | } | ||
821 | |||
822 | /* VM Virtual Address Allocate | ||
823 | * | ||
824 | * Input Args: | ||
825 | * vm - Virtual Machine | ||
826 | * sz - Size in bytes | ||
827 | * vaddr_min - Minimum starting virtual address | ||
828 | * data_memslot - Memory region slot for data pages | ||
829 | * pgd_memslot - Memory region slot for new virtual translation tables | ||
830 | * | ||
831 | * Output Args: None | ||
832 | * | ||
833 | * Return: | ||
834 | * Starting guest virtual address | ||
835 | * | ||
836 | * Allocates at least sz bytes within the virtual address space of the vm | ||
837 | * given by vm. The allocated bytes are mapped to a virtual address >= | ||
838 | * the address given by vaddr_min. Note that each allocation uses a | ||
839 | * a unique set of pages, with the minimum real allocation being at least | ||
840 | * a page. | ||
841 | */ | ||
842 | vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, | ||
843 | uint32_t data_memslot, uint32_t pgd_memslot) | ||
844 | { | ||
845 | uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); | ||
846 | |||
847 | virt_pgd_alloc(vm, pgd_memslot); | ||
848 | |||
849 | /* Find an unused range of virtual page addresses of at least | ||
850 | * pages in length. | ||
851 | */ | ||
852 | vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); | ||
853 | |||
854 | /* Map the virtual pages. */ | ||
855 | for (vm_vaddr_t vaddr = vaddr_start; pages > 0; | ||
856 | pages--, vaddr += vm->page_size) { | ||
857 | vm_paddr_t paddr; | ||
858 | |||
859 | paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot); | ||
860 | |||
861 | virt_pg_map(vm, vaddr, paddr, pgd_memslot); | ||
862 | |||
863 | sparsebit_set(vm->vpages_mapped, | ||
864 | vaddr >> vm->page_shift); | ||
865 | } | ||
866 | |||
867 | return vaddr_start; | ||
868 | } | ||
869 | |||
870 | /* Address VM Physical to Host Virtual | ||
871 | * | ||
872 | * Input Args: | ||
873 | * vm - Virtual Machine | ||
874 | * gpa - VM physical address | ||
875 | * | ||
876 | * Output Args: None | ||
877 | * | ||
878 | * Return: | ||
879 | * Equivalent host virtual address | ||
880 | * | ||
881 | * Locates the memory region containing the VM physical address given | ||
882 | * by gpa, within the VM given by vm. When found, the host virtual | ||
883 | * address providing the memory to the vm physical address is returned. | ||
884 | * A TEST_ASSERT failure occurs if no region containing gpa exists. | ||
885 | */ | ||
886 | void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) | ||
887 | { | ||
888 | struct userspace_mem_region *region; | ||
889 | for (region = vm->userspace_mem_region_head; region; | ||
890 | region = region->next) { | ||
891 | if ((gpa >= region->region.guest_phys_addr) | ||
892 | && (gpa <= (region->region.guest_phys_addr | ||
893 | + region->region.memory_size - 1))) | ||
894 | return (void *) ((uintptr_t) region->host_mem | ||
895 | + (gpa - region->region.guest_phys_addr)); | ||
896 | } | ||
897 | |||
898 | TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa); | ||
899 | return NULL; | ||
900 | } | ||
901 | |||
902 | /* Address Host Virtual to VM Physical | ||
903 | * | ||
904 | * Input Args: | ||
905 | * vm - Virtual Machine | ||
906 | * hva - Host virtual address | ||
907 | * | ||
908 | * Output Args: None | ||
909 | * | ||
910 | * Return: | ||
911 | * Equivalent VM physical address | ||
912 | * | ||
913 | * Locates the memory region containing the host virtual address given | ||
914 | * by hva, within the VM given by vm. When found, the equivalent | ||
915 | * VM physical address is returned. A TEST_ASSERT failure occurs if no | ||
916 | * region containing hva exists. | ||
917 | */ | ||
918 | vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) | ||
919 | { | ||
920 | struct userspace_mem_region *region; | ||
921 | for (region = vm->userspace_mem_region_head; region; | ||
922 | region = region->next) { | ||
923 | if ((hva >= region->host_mem) | ||
924 | && (hva <= (region->host_mem | ||
925 | + region->region.memory_size - 1))) | ||
926 | return (vm_paddr_t) ((uintptr_t) | ||
927 | region->region.guest_phys_addr | ||
928 | + (hva - (uintptr_t) region->host_mem)); | ||
929 | } | ||
930 | |||
931 | TEST_ASSERT(false, "No mapping to a guest physical address, " | ||
932 | "hva: %p", hva); | ||
933 | return -1; | ||
934 | } | ||
935 | |||
936 | /* VM Create IRQ Chip | ||
937 | * | ||
938 | * Input Args: | ||
939 | * vm - Virtual Machine | ||
940 | * | ||
941 | * Output Args: None | ||
942 | * | ||
943 | * Return: None | ||
944 | * | ||
945 | * Creates an interrupt controller chip for the VM specified by vm. | ||
946 | */ | ||
947 | void vm_create_irqchip(struct kvm_vm *vm) | ||
948 | { | ||
949 | int ret; | ||
950 | |||
951 | ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); | ||
952 | TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " | ||
953 | "rc: %i errno: %i", ret, errno); | ||
954 | } | ||
955 | |||
956 | /* VM VCPU State | ||
957 | * | ||
958 | * Input Args: | ||
959 | * vm - Virtual Machine | ||
960 | * vcpuid - VCPU ID | ||
961 | * | ||
962 | * Output Args: None | ||
963 | * | ||
964 | * Return: | ||
965 | * Pointer to structure that describes the state of the VCPU. | ||
966 | * | ||
967 | * Locates and returns a pointer to a structure that describes the | ||
968 | * state of the VCPU with the given vcpuid. | ||
969 | */ | ||
970 | struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) | ||
971 | { | ||
972 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
973 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
974 | |||
975 | return vcpu->state; | ||
976 | } | ||
977 | |||
978 | /* VM VCPU Run | ||
979 | * | ||
980 | * Input Args: | ||
981 | * vm - Virtual Machine | ||
982 | * vcpuid - VCPU ID | ||
983 | * | ||
984 | * Output Args: None | ||
985 | * | ||
986 | * Return: None | ||
987 | * | ||
988 | * Switch to executing the code for the VCPU given by vcpuid, within the VM | ||
989 | * given by vm. | ||
990 | */ | ||
991 | void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) | ||
992 | { | ||
993 | int ret = _vcpu_run(vm, vcpuid); | ||
994 | TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " | ||
995 | "rc: %i errno: %i", ret, errno); | ||
996 | } | ||
997 | |||
998 | int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) | ||
999 | { | ||
1000 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1001 | int rc; | ||
1002 | |||
1003 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1004 | do { | ||
1005 | rc = ioctl(vcpu->fd, KVM_RUN, NULL); | ||
1006 | } while (rc == -1 && errno == EINTR); | ||
1007 | return rc; | ||
1008 | } | ||
1009 | |||
1010 | /* VM VCPU Set MP State | ||
1011 | * | ||
1012 | * Input Args: | ||
1013 | * vm - Virtual Machine | ||
1014 | * vcpuid - VCPU ID | ||
1015 | * mp_state - mp_state to be set | ||
1016 | * | ||
1017 | * Output Args: None | ||
1018 | * | ||
1019 | * Return: None | ||
1020 | * | ||
1021 | * Sets the MP state of the VCPU given by vcpuid, to the state given | ||
1022 | * by mp_state. | ||
1023 | */ | ||
1024 | void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, | ||
1025 | struct kvm_mp_state *mp_state) | ||
1026 | { | ||
1027 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1028 | int ret; | ||
1029 | |||
1030 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1031 | |||
1032 | ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); | ||
1033 | TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " | ||
1034 | "rc: %i errno: %i", ret, errno); | ||
1035 | } | ||
1036 | |||
1037 | /* VM VCPU Regs Get | ||
1038 | * | ||
1039 | * Input Args: | ||
1040 | * vm - Virtual Machine | ||
1041 | * vcpuid - VCPU ID | ||
1042 | * | ||
1043 | * Output Args: | ||
1044 | * regs - current state of VCPU regs | ||
1045 | * | ||
1046 | * Return: None | ||
1047 | * | ||
1048 | * Obtains the current register state for the VCPU specified by vcpuid | ||
1049 | * and stores it at the location given by regs. | ||
1050 | */ | ||
1051 | void vcpu_regs_get(struct kvm_vm *vm, | ||
1052 | uint32_t vcpuid, struct kvm_regs *regs) | ||
1053 | { | ||
1054 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1055 | int ret; | ||
1056 | |||
1057 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1058 | |||
1059 | /* Get the regs. */ | ||
1060 | ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); | ||
1061 | TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", | ||
1062 | ret, errno); | ||
1063 | } | ||
1064 | |||
1065 | /* VM VCPU Regs Set | ||
1066 | * | ||
1067 | * Input Args: | ||
1068 | * vm - Virtual Machine | ||
1069 | * vcpuid - VCPU ID | ||
1070 | * regs - Values to set VCPU regs to | ||
1071 | * | ||
1072 | * Output Args: None | ||
1073 | * | ||
1074 | * Return: None | ||
1075 | * | ||
1076 | * Sets the regs of the VCPU specified by vcpuid to the values | ||
1077 | * given by regs. | ||
1078 | */ | ||
1079 | void vcpu_regs_set(struct kvm_vm *vm, | ||
1080 | uint32_t vcpuid, struct kvm_regs *regs) | ||
1081 | { | ||
1082 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1083 | int ret; | ||
1084 | |||
1085 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1086 | |||
1087 | /* Set the regs. */ | ||
1088 | ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); | ||
1089 | TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", | ||
1090 | ret, errno); | ||
1091 | } | ||
1092 | |||
1093 | void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, | ||
1094 | struct kvm_vcpu_events *events) | ||
1095 | { | ||
1096 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1097 | int ret; | ||
1098 | |||
1099 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1100 | |||
1101 | /* Get the regs. */ | ||
1102 | ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); | ||
1103 | TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", | ||
1104 | ret, errno); | ||
1105 | } | ||
1106 | |||
1107 | void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, | ||
1108 | struct kvm_vcpu_events *events) | ||
1109 | { | ||
1110 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1111 | int ret; | ||
1112 | |||
1113 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1114 | |||
1115 | /* Set the regs. */ | ||
1116 | ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); | ||
1117 | TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", | ||
1118 | ret, errno); | ||
1119 | } | ||
1120 | |||
1121 | /* VM VCPU Args Set | ||
1122 | * | ||
1123 | * Input Args: | ||
1124 | * vm - Virtual Machine | ||
1125 | * vcpuid - VCPU ID | ||
1126 | * num - number of arguments | ||
1127 | * ... - arguments, each of type uint64_t | ||
1128 | * | ||
1129 | * Output Args: None | ||
1130 | * | ||
1131 | * Return: None | ||
1132 | * | ||
1133 | * Sets the first num function input arguments to the values | ||
1134 | * given as variable args. Each of the variable args is expected to | ||
1135 | * be of type uint64_t. | ||
1136 | */ | ||
1137 | void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) | ||
1138 | { | ||
1139 | va_list ap; | ||
1140 | struct kvm_regs regs; | ||
1141 | |||
1142 | TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" | ||
1143 | " num: %u\n", | ||
1144 | num); | ||
1145 | |||
1146 | va_start(ap, num); | ||
1147 | vcpu_regs_get(vm, vcpuid, ®s); | ||
1148 | |||
1149 | if (num >= 1) | ||
1150 | regs.rdi = va_arg(ap, uint64_t); | ||
1151 | |||
1152 | if (num >= 2) | ||
1153 | regs.rsi = va_arg(ap, uint64_t); | ||
1154 | |||
1155 | if (num >= 3) | ||
1156 | regs.rdx = va_arg(ap, uint64_t); | ||
1157 | |||
1158 | if (num >= 4) | ||
1159 | regs.rcx = va_arg(ap, uint64_t); | ||
1160 | |||
1161 | if (num >= 5) | ||
1162 | regs.r8 = va_arg(ap, uint64_t); | ||
1163 | |||
1164 | if (num >= 6) | ||
1165 | regs.r9 = va_arg(ap, uint64_t); | ||
1166 | |||
1167 | vcpu_regs_set(vm, vcpuid, ®s); | ||
1168 | va_end(ap); | ||
1169 | } | ||
1170 | |||
1171 | /* VM VCPU System Regs Get | ||
1172 | * | ||
1173 | * Input Args: | ||
1174 | * vm - Virtual Machine | ||
1175 | * vcpuid - VCPU ID | ||
1176 | * | ||
1177 | * Output Args: | ||
1178 | * sregs - current state of VCPU system regs | ||
1179 | * | ||
1180 | * Return: None | ||
1181 | * | ||
1182 | * Obtains the current system register state for the VCPU specified by | ||
1183 | * vcpuid and stores it at the location given by sregs. | ||
1184 | */ | ||
1185 | void vcpu_sregs_get(struct kvm_vm *vm, | ||
1186 | uint32_t vcpuid, struct kvm_sregs *sregs) | ||
1187 | { | ||
1188 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1189 | int ret; | ||
1190 | |||
1191 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1192 | |||
1193 | /* Get the regs. */ | ||
1194 | /* Get the regs. */ | ||
1195 | ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); | ||
1196 | TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", | ||
1197 | ret, errno); | ||
1198 | } | ||
1199 | |||
1200 | /* VM VCPU System Regs Set | ||
1201 | * | ||
1202 | * Input Args: | ||
1203 | * vm - Virtual Machine | ||
1204 | * vcpuid - VCPU ID | ||
1205 | * sregs - Values to set VCPU system regs to | ||
1206 | * | ||
1207 | * Output Args: None | ||
1208 | * | ||
1209 | * Return: None | ||
1210 | * | ||
1211 | * Sets the system regs of the VCPU specified by vcpuid to the values | ||
1212 | * given by sregs. | ||
1213 | */ | ||
1214 | void vcpu_sregs_set(struct kvm_vm *vm, | ||
1215 | uint32_t vcpuid, struct kvm_sregs *sregs) | ||
1216 | { | ||
1217 | int ret = _vcpu_sregs_set(vm, vcpuid, sregs); | ||
1218 | TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " | ||
1219 | "rc: %i errno: %i", ret, errno); | ||
1220 | } | ||
1221 | |||
1222 | int _vcpu_sregs_set(struct kvm_vm *vm, | ||
1223 | uint32_t vcpuid, struct kvm_sregs *sregs) | ||
1224 | { | ||
1225 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1226 | int ret; | ||
1227 | |||
1228 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1229 | |||
1230 | /* Get the regs. */ | ||
1231 | return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); | ||
1232 | } | ||
1233 | |||
1234 | /* VCPU Ioctl | ||
1235 | * | ||
1236 | * Input Args: | ||
1237 | * vm - Virtual Machine | ||
1238 | * vcpuid - VCPU ID | ||
1239 | * cmd - Ioctl number | ||
1240 | * arg - Argument to pass to the ioctl | ||
1241 | * | ||
1242 | * Return: None | ||
1243 | * | ||
1244 | * Issues an arbitrary ioctl on a VCPU fd. | ||
1245 | */ | ||
1246 | void vcpu_ioctl(struct kvm_vm *vm, | ||
1247 | uint32_t vcpuid, unsigned long cmd, void *arg) | ||
1248 | { | ||
1249 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
1250 | int ret; | ||
1251 | |||
1252 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
1253 | |||
1254 | ret = ioctl(vcpu->fd, cmd, arg); | ||
1255 | TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", | ||
1256 | cmd, ret, errno, strerror(errno)); | ||
1257 | } | ||
1258 | |||
1259 | /* VM Ioctl | ||
1260 | * | ||
1261 | * Input Args: | ||
1262 | * vm - Virtual Machine | ||
1263 | * cmd - Ioctl number | ||
1264 | * arg - Argument to pass to the ioctl | ||
1265 | * | ||
1266 | * Return: None | ||
1267 | * | ||
1268 | * Issues an arbitrary ioctl on a VM fd. | ||
1269 | */ | ||
1270 | void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) | ||
1271 | { | ||
1272 | int ret; | ||
1273 | |||
1274 | ret = ioctl(vm->fd, cmd, arg); | ||
1275 | TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", | ||
1276 | cmd, ret, errno, strerror(errno)); | ||
1277 | } | ||
1278 | |||
1279 | /* VM Dump | ||
1280 | * | ||
1281 | * Input Args: | ||
1282 | * vm - Virtual Machine | ||
1283 | * indent - Left margin indent amount | ||
1284 | * | ||
1285 | * Output Args: | ||
1286 | * stream - Output FILE stream | ||
1287 | * | ||
1288 | * Return: None | ||
1289 | * | ||
1290 | * Dumps the current state of the VM given by vm, to the FILE stream | ||
1291 | * given by stream. | ||
1292 | */ | ||
1293 | void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) | ||
1294 | { | ||
1295 | struct userspace_mem_region *region; | ||
1296 | struct vcpu *vcpu; | ||
1297 | |||
1298 | fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); | ||
1299 | fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); | ||
1300 | fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); | ||
1301 | fprintf(stream, "%*sMem Regions:\n", indent, ""); | ||
1302 | for (region = vm->userspace_mem_region_head; region; | ||
1303 | region = region->next) { | ||
1304 | fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " | ||
1305 | "host_virt: %p\n", indent + 2, "", | ||
1306 | (uint64_t) region->region.guest_phys_addr, | ||
1307 | (uint64_t) region->region.memory_size, | ||
1308 | region->host_mem); | ||
1309 | fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); | ||
1310 | sparsebit_dump(stream, region->unused_phy_pages, 0); | ||
1311 | } | ||
1312 | fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); | ||
1313 | sparsebit_dump(stream, vm->vpages_mapped, indent + 2); | ||
1314 | fprintf(stream, "%*spgd_created: %u\n", indent, "", | ||
1315 | vm->pgd_created); | ||
1316 | if (vm->pgd_created) { | ||
1317 | fprintf(stream, "%*sVirtual Translation Tables:\n", | ||
1318 | indent + 2, ""); | ||
1319 | virt_dump(stream, vm, indent + 4); | ||
1320 | } | ||
1321 | fprintf(stream, "%*sVCPUs:\n", indent, ""); | ||
1322 | for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next) | ||
1323 | vcpu_dump(stream, vm, vcpu->id, indent + 2); | ||
1324 | } | ||
1325 | |||
1326 | /* VM VCPU Dump | ||
1327 | * | ||
1328 | * Input Args: | ||
1329 | * vm - Virtual Machine | ||
1330 | * vcpuid - VCPU ID | ||
1331 | * indent - Left margin indent amount | ||
1332 | * | ||
1333 | * Output Args: | ||
1334 | * stream - Output FILE stream | ||
1335 | * | ||
1336 | * Return: None | ||
1337 | * | ||
1338 | * Dumps the current state of the VCPU specified by vcpuid, within the VM | ||
1339 | * given by vm, to the FILE stream given by stream. | ||
1340 | */ | ||
1341 | void vcpu_dump(FILE *stream, struct kvm_vm *vm, | ||
1342 | uint32_t vcpuid, uint8_t indent) | ||
1343 | { | ||
1344 | struct kvm_regs regs; | ||
1345 | struct kvm_sregs sregs; | ||
1346 | |||
1347 | fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); | ||
1348 | |||
1349 | fprintf(stream, "%*sregs:\n", indent + 2, ""); | ||
1350 | vcpu_regs_get(vm, vcpuid, ®s); | ||
1351 | regs_dump(stream, ®s, indent + 4); | ||
1352 | |||
1353 | fprintf(stream, "%*ssregs:\n", indent + 2, ""); | ||
1354 | vcpu_sregs_get(vm, vcpuid, &sregs); | ||
1355 | sregs_dump(stream, &sregs, indent + 4); | ||
1356 | } | ||
1357 | |||
1358 | /* Known KVM exit reasons */ | ||
1359 | static struct exit_reason { | ||
1360 | unsigned int reason; | ||
1361 | const char *name; | ||
1362 | } exit_reasons_known[] = { | ||
1363 | {KVM_EXIT_UNKNOWN, "UNKNOWN"}, | ||
1364 | {KVM_EXIT_EXCEPTION, "EXCEPTION"}, | ||
1365 | {KVM_EXIT_IO, "IO"}, | ||
1366 | {KVM_EXIT_HYPERCALL, "HYPERCALL"}, | ||
1367 | {KVM_EXIT_DEBUG, "DEBUG"}, | ||
1368 | {KVM_EXIT_HLT, "HLT"}, | ||
1369 | {KVM_EXIT_MMIO, "MMIO"}, | ||
1370 | {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, | ||
1371 | {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, | ||
1372 | {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, | ||
1373 | {KVM_EXIT_INTR, "INTR"}, | ||
1374 | {KVM_EXIT_SET_TPR, "SET_TPR"}, | ||
1375 | {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, | ||
1376 | {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, | ||
1377 | {KVM_EXIT_S390_RESET, "S390_RESET"}, | ||
1378 | {KVM_EXIT_DCR, "DCR"}, | ||
1379 | {KVM_EXIT_NMI, "NMI"}, | ||
1380 | {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, | ||
1381 | {KVM_EXIT_OSI, "OSI"}, | ||
1382 | {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, | ||
1383 | #ifdef KVM_EXIT_MEMORY_NOT_PRESENT | ||
1384 | {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, | ||
1385 | #endif | ||
1386 | }; | ||
1387 | |||
1388 | /* Exit Reason String | ||
1389 | * | ||
1390 | * Input Args: | ||
1391 | * exit_reason - Exit reason | ||
1392 | * | ||
1393 | * Output Args: None | ||
1394 | * | ||
1395 | * Return: | ||
1396 | * Constant string pointer describing the exit reason. | ||
1397 | * | ||
1398 | * Locates and returns a constant string that describes the KVM exit | ||
1399 | * reason given by exit_reason. If no such string is found, a constant | ||
1400 | * string of "Unknown" is returned. | ||
1401 | */ | ||
1402 | const char *exit_reason_str(unsigned int exit_reason) | ||
1403 | { | ||
1404 | unsigned int n1; | ||
1405 | |||
1406 | for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { | ||
1407 | if (exit_reason == exit_reasons_known[n1].reason) | ||
1408 | return exit_reasons_known[n1].name; | ||
1409 | } | ||
1410 | |||
1411 | return "Unknown"; | ||
1412 | } | ||
1413 | |||
1414 | /* Physical Page Allocate | ||
1415 | * | ||
1416 | * Input Args: | ||
1417 | * vm - Virtual Machine | ||
1418 | * paddr_min - Physical address minimum | ||
1419 | * memslot - Memory region to allocate page from | ||
1420 | * | ||
1421 | * Output Args: None | ||
1422 | * | ||
1423 | * Return: | ||
1424 | * Starting physical address | ||
1425 | * | ||
1426 | * Within the VM specified by vm, locates an available physical page | ||
1427 | * at or above paddr_min. If found, the page is marked as in use | ||
1428 | * and its address is returned. A TEST_ASSERT failure occurs if no | ||
1429 | * page is available at or above paddr_min. | ||
1430 | */ | ||
1431 | vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, | ||
1432 | vm_paddr_t paddr_min, uint32_t memslot) | ||
1433 | { | ||
1434 | struct userspace_mem_region *region; | ||
1435 | sparsebit_idx_t pg; | ||
1436 | |||
1437 | TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " | ||
1438 | "not divisable by page size.\n" | ||
1439 | " paddr_min: 0x%lx page_size: 0x%x", | ||
1440 | paddr_min, vm->page_size); | ||
1441 | |||
1442 | /* Locate memory region. */ | ||
1443 | region = memslot2region(vm, memslot); | ||
1444 | |||
1445 | /* Locate next available physical page at or above paddr_min. */ | ||
1446 | pg = paddr_min >> vm->page_shift; | ||
1447 | |||
1448 | if (!sparsebit_is_set(region->unused_phy_pages, pg)) { | ||
1449 | pg = sparsebit_next_set(region->unused_phy_pages, pg); | ||
1450 | if (pg == 0) { | ||
1451 | fprintf(stderr, "No guest physical page available, " | ||
1452 | "paddr_min: 0x%lx page_size: 0x%x memslot: %u", | ||
1453 | paddr_min, vm->page_size, memslot); | ||
1454 | fputs("---- vm dump ----\n", stderr); | ||
1455 | vm_dump(stderr, vm, 2); | ||
1456 | abort(); | ||
1457 | } | ||
1458 | } | ||
1459 | |||
1460 | /* Specify page as in use and return its address. */ | ||
1461 | sparsebit_clear(region->unused_phy_pages, pg); | ||
1462 | |||
1463 | return pg * vm->page_size; | ||
1464 | } | ||
1465 | |||
1466 | /* Address Guest Virtual to Host Virtual | ||
1467 | * | ||
1468 | * Input Args: | ||
1469 | * vm - Virtual Machine | ||
1470 | * gva - VM virtual address | ||
1471 | * | ||
1472 | * Output Args: None | ||
1473 | * | ||
1474 | * Return: | ||
1475 | * Equivalent host virtual address | ||
1476 | */ | ||
1477 | void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) | ||
1478 | { | ||
1479 | return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); | ||
1480 | } | ||
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h new file mode 100644 index 000000000000..a0bd1980c81c --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/lib/kvm_util.c | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | */ | ||
8 | |||
9 | #ifndef KVM_UTIL_INTERNAL_H | ||
10 | #define KVM_UTIL_INTERNAL_H 1 | ||
11 | |||
12 | #include "sparsebit.h" | ||
13 | |||
14 | #ifndef BITS_PER_BYTE | ||
15 | #define BITS_PER_BYTE 8 | ||
16 | #endif | ||
17 | |||
18 | #ifndef BITS_PER_LONG | ||
19 | #define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) | ||
20 | #endif | ||
21 | |||
22 | #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) | ||
23 | #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) | ||
24 | |||
25 | /* Concrete definition of struct kvm_vm. */ | ||
26 | struct userspace_mem_region { | ||
27 | struct userspace_mem_region *next, *prev; | ||
28 | struct kvm_userspace_memory_region region; | ||
29 | struct sparsebit *unused_phy_pages; | ||
30 | int fd; | ||
31 | off_t offset; | ||
32 | void *host_mem; | ||
33 | void *mmap_start; | ||
34 | size_t mmap_size; | ||
35 | }; | ||
36 | |||
37 | struct vcpu { | ||
38 | struct vcpu *next, *prev; | ||
39 | uint32_t id; | ||
40 | int fd; | ||
41 | struct kvm_run *state; | ||
42 | }; | ||
43 | |||
44 | struct kvm_vm { | ||
45 | int mode; | ||
46 | int fd; | ||
47 | unsigned int page_size; | ||
48 | unsigned int page_shift; | ||
49 | uint64_t max_gfn; | ||
50 | struct vcpu *vcpu_head; | ||
51 | struct userspace_mem_region *userspace_mem_region_head; | ||
52 | struct sparsebit *vpages_valid; | ||
53 | struct sparsebit *vpages_mapped; | ||
54 | bool pgd_created; | ||
55 | vm_paddr_t pgd; | ||
56 | }; | ||
57 | |||
58 | struct vcpu *vcpu_find(struct kvm_vm *vm, | ||
59 | uint32_t vcpuid); | ||
60 | void vcpu_setup(struct kvm_vm *vm, int vcpuid); | ||
61 | void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); | ||
62 | void regs_dump(FILE *stream, struct kvm_regs *regs, | ||
63 | uint8_t indent); | ||
64 | void sregs_dump(FILE *stream, struct kvm_sregs *sregs, | ||
65 | uint8_t indent); | ||
66 | |||
67 | #endif | ||
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c new file mode 100644 index 000000000000..0c5cf3e0cb6f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/sparsebit.c | |||
@@ -0,0 +1,2087 @@ | |||
1 | /* | ||
2 | * Sparse bit array | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver) | ||
6 | * | ||
7 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
8 | * | ||
9 | * This library provides functions to support a memory efficient bit array, | ||
10 | * with an index size of 2^64. A sparsebit array is allocated through | ||
11 | * the use sparsebit_alloc() and free'd via sparsebit_free(), | ||
12 | * such as in the following: | ||
13 | * | ||
14 | * struct sparsebit *s; | ||
15 | * s = sparsebit_alloc(); | ||
16 | * sparsebit_free(&s); | ||
17 | * | ||
18 | * The struct sparsebit type resolves down to a struct sparsebit. | ||
19 | * Note that, sparsebit_free() takes a pointer to the sparsebit | ||
20 | * structure. This is so that sparsebit_free() is able to poison | ||
21 | * the pointer (e.g. set it to NULL) to the struct sparsebit before | ||
22 | * returning to the caller. | ||
23 | * | ||
24 | * Between the return of sparsebit_alloc() and the call of | ||
25 | * sparsebit_free(), there are multiple query and modifying operations | ||
26 | * that can be performed on the allocated sparsebit array. All of | ||
27 | * these operations take as a parameter the value returned from | ||
28 | * sparsebit_alloc() and most also take a bit index. Frequently | ||
29 | * used routines include: | ||
30 | * | ||
31 | * ---- Query Operations | ||
32 | * sparsebit_is_set(s, idx) | ||
33 | * sparsebit_is_clear(s, idx) | ||
34 | * sparsebit_any_set(s) | ||
35 | * sparsebit_first_set(s) | ||
36 | * sparsebit_next_set(s, prev_idx) | ||
37 | * | ||
38 | * ---- Modifying Operations | ||
39 | * sparsebit_set(s, idx) | ||
40 | * sparsebit_clear(s, idx) | ||
41 | * sparsebit_set_num(s, idx, num); | ||
42 | * sparsebit_clear_num(s, idx, num); | ||
43 | * | ||
44 | * A common operation, is to itterate over all the bits set in a test | ||
45 | * sparsebit array. This can be done via code with the following structure: | ||
46 | * | ||
47 | * sparsebit_idx_t idx; | ||
48 | * if (sparsebit_any_set(s)) { | ||
49 | * idx = sparsebit_first_set(s); | ||
50 | * do { | ||
51 | * ... | ||
52 | * idx = sparsebit_next_set(s, idx); | ||
53 | * } while (idx != 0); | ||
54 | * } | ||
55 | * | ||
56 | * The index of the first bit set needs to be obtained via | ||
57 | * sparsebit_first_set(), because sparsebit_next_set(), needs | ||
58 | * the index of the previously set. The sparsebit_idx_t type is | ||
59 | * unsigned, so there is no previous index before 0 that is available. | ||
60 | * Also, the call to sparsebit_first_set() is not made unless there | ||
61 | * is at least 1 bit in the array set. This is because sparsebit_first_set() | ||
62 | * aborts if sparsebit_first_set() is called with no bits set. | ||
63 | * It is the callers responsibility to assure that the | ||
64 | * sparsebit array has at least a single bit set before calling | ||
65 | * sparsebit_first_set(). | ||
66 | * | ||
67 | * ==== Implementation Overview ==== | ||
68 | * For the most part the internal implementation of sparsebit is | ||
69 | * opaque to the caller. One important implementation detail that the | ||
70 | * caller may need to be aware of is the spatial complexity of the | ||
71 | * implementation. This implementation of a sparsebit array is not | ||
72 | * only sparse, in that it uses memory proportional to the number of bits | ||
73 | * set. It is also efficient in memory usage when most of the bits are | ||
74 | * set. | ||
75 | * | ||
76 | * At a high-level the state of the bit settings are maintained through | ||
77 | * the use of a binary-search tree, where each node contains at least | ||
78 | * the following members: | ||
79 | * | ||
80 | * typedef uint64_t sparsebit_idx_t; | ||
81 | * typedef uint64_t sparsebit_num_t; | ||
82 | * | ||
83 | * sparsebit_idx_t idx; | ||
84 | * uint32_t mask; | ||
85 | * sparsebit_num_t num_after; | ||
86 | * | ||
87 | * The idx member contains the bit index of the first bit described by this | ||
88 | * node, while the mask member stores the setting of the first 32-bits. | ||
89 | * The setting of the bit at idx + n, where 0 <= n < 32, is located in the | ||
90 | * mask member at 1 << n. | ||
91 | * | ||
92 | * Nodes are sorted by idx and the bits described by two nodes will never | ||
93 | * overlap. The idx member is always aligned to the mask size, i.e. a | ||
94 | * multiple of 32. | ||
95 | * | ||
96 | * Beyond a typical implementation, the nodes in this implementation also | ||
97 | * contains a member named num_after. The num_after member holds the | ||
98 | * number of bits immediately after the mask bits that are contiguously set. | ||
99 | * The use of the num_after member allows this implementation to efficiently | ||
100 | * represent cases where most bits are set. For example, the case of all | ||
101 | * but the last two bits set, is represented by the following two nodes: | ||
102 | * | ||
103 | * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0 | ||
104 | * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0 | ||
105 | * | ||
106 | * ==== Invariants ==== | ||
107 | * This implementation usses the following invariants: | ||
108 | * | ||
109 | * + Node are only used to represent bits that are set. | ||
110 | * Nodes with a mask of 0 and num_after of 0 are not allowed. | ||
111 | * | ||
112 | * + Sum of bits set in all the nodes is equal to the value of | ||
113 | * the struct sparsebit_pvt num_set member. | ||
114 | * | ||
115 | * + The setting of at least one bit is always described in a nodes | ||
116 | * mask (mask >= 1). | ||
117 | * | ||
118 | * + A node with all mask bits set only occurs when the last bit | ||
119 | * described by the previous node is not equal to this nodes | ||
120 | * starting index - 1. All such occurences of this condition are | ||
121 | * avoided by moving the setting of the nodes mask bits into | ||
122 | * the previous nodes num_after setting. | ||
123 | * | ||
124 | * + Node starting index is evenly divisable by the number of bits | ||
125 | * within a nodes mask member. | ||
126 | * | ||
127 | * + Nodes never represent a range of bits that wrap around the | ||
128 | * highest supported index. | ||
129 | * | ||
130 | * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1) | ||
131 | * | ||
132 | * As a consequence of the above, the num_after member of a node | ||
133 | * will always be <=: | ||
134 | * | ||
135 | * maximum_index - nodes_starting_index - number_of_mask_bits | ||
136 | * | ||
137 | * + Nodes within the binary search tree are sorted based on each | ||
138 | * nodes starting index. | ||
139 | * | ||
140 | * + The range of bits described by any two nodes do not overlap. The | ||
141 | * range of bits described by a single node is: | ||
142 | * | ||
143 | * start: node->idx | ||
144 | * end (inclusive): node->idx + MASK_BITS + node->num_after - 1; | ||
145 | * | ||
146 | * Note, at times these invariants are temporarily violated for a | ||
147 | * specific portion of the code. For example, when setting a mask | ||
148 | * bit, there is a small delay between when the mask bit is set and the | ||
149 | * value in the struct sparsebit_pvt num_set member is updated. Other | ||
150 | * temporary violations occur when node_split() is called with a specified | ||
151 | * index and assures that a node where its mask represents the bit | ||
152 | * at the specified index exists. At times to do this node_split() | ||
153 | * must split an existing node into two nodes or create a node that | ||
154 | * has no bits set. Such temporary violations must be corrected before | ||
155 | * returning to the caller. These corrections are typically performed | ||
156 | * by the local function node_reduce(). | ||
157 | */ | ||
158 | |||
159 | #include "test_util.h" | ||
160 | #include "sparsebit.h" | ||
161 | #include <limits.h> | ||
162 | #include <assert.h> | ||
163 | |||
164 | #define DUMP_LINE_MAX 100 /* Does not include indent amount */ | ||
165 | |||
166 | typedef uint32_t mask_t; | ||
167 | #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) | ||
168 | |||
169 | struct node { | ||
170 | struct node *parent; | ||
171 | struct node *left; | ||
172 | struct node *right; | ||
173 | sparsebit_idx_t idx; /* index of least-significant bit in mask */ | ||
174 | sparsebit_num_t num_after; /* num contiguously set after mask */ | ||
175 | mask_t mask; | ||
176 | }; | ||
177 | |||
178 | struct sparsebit { | ||
179 | /* | ||
180 | * Points to root node of the binary search | ||
181 | * tree. Equal to NULL when no bits are set in | ||
182 | * the entire sparsebit array. | ||
183 | */ | ||
184 | struct node *root; | ||
185 | |||
186 | /* | ||
187 | * A redundant count of the total number of bits set. Used for | ||
188 | * diagnostic purposes and to change the time complexity of | ||
189 | * sparsebit_num_set() from O(n) to O(1). | ||
190 | * Note: Due to overflow, a value of 0 means none or all set. | ||
191 | */ | ||
192 | sparsebit_num_t num_set; | ||
193 | }; | ||
194 | |||
195 | /* Returns the number of set bits described by the settings | ||
196 | * of the node pointed to by nodep. | ||
197 | */ | ||
198 | static sparsebit_num_t node_num_set(struct node *nodep) | ||
199 | { | ||
200 | return nodep->num_after + __builtin_popcount(nodep->mask); | ||
201 | } | ||
202 | |||
203 | /* Returns a pointer to the node that describes the | ||
204 | * lowest bit index. | ||
205 | */ | ||
206 | static struct node *node_first(struct sparsebit *s) | ||
207 | { | ||
208 | struct node *nodep; | ||
209 | |||
210 | for (nodep = s->root; nodep && nodep->left; nodep = nodep->left) | ||
211 | ; | ||
212 | |||
213 | return nodep; | ||
214 | } | ||
215 | |||
216 | /* Returns a pointer to the node that describes the | ||
217 | * lowest bit index > the index of the node pointed to by np. | ||
218 | * Returns NULL if no node with a higher index exists. | ||
219 | */ | ||
220 | static struct node *node_next(struct sparsebit *s, struct node *np) | ||
221 | { | ||
222 | struct node *nodep = np; | ||
223 | |||
224 | /* | ||
225 | * If current node has a right child, next node is the left-most | ||
226 | * of the right child. | ||
227 | */ | ||
228 | if (nodep->right) { | ||
229 | for (nodep = nodep->right; nodep->left; nodep = nodep->left) | ||
230 | ; | ||
231 | return nodep; | ||
232 | } | ||
233 | |||
234 | /* | ||
235 | * No right child. Go up until node is left child of a parent. | ||
236 | * That parent is then the next node. | ||
237 | */ | ||
238 | while (nodep->parent && nodep == nodep->parent->right) | ||
239 | nodep = nodep->parent; | ||
240 | |||
241 | return nodep->parent; | ||
242 | } | ||
243 | |||
244 | /* Searches for and returns a pointer to the node that describes the | ||
245 | * highest index < the index of the node pointed to by np. | ||
246 | * Returns NULL if no node with a lower index exists. | ||
247 | */ | ||
248 | static struct node *node_prev(struct sparsebit *s, struct node *np) | ||
249 | { | ||
250 | struct node *nodep = np; | ||
251 | |||
252 | /* | ||
253 | * If current node has a left child, next node is the right-most | ||
254 | * of the left child. | ||
255 | */ | ||
256 | if (nodep->left) { | ||
257 | for (nodep = nodep->left; nodep->right; nodep = nodep->right) | ||
258 | ; | ||
259 | return (struct node *) nodep; | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * No left child. Go up until node is right child of a parent. | ||
264 | * That parent is then the next node. | ||
265 | */ | ||
266 | while (nodep->parent && nodep == nodep->parent->left) | ||
267 | nodep = nodep->parent; | ||
268 | |||
269 | return (struct node *) nodep->parent; | ||
270 | } | ||
271 | |||
272 | |||
273 | /* Allocates space to hold a copy of the node sub-tree pointed to by | ||
274 | * subtree and duplicates the bit settings to the newly allocated nodes. | ||
275 | * Returns the newly allocated copy of subtree. | ||
276 | */ | ||
277 | static struct node *node_copy_subtree(struct node *subtree) | ||
278 | { | ||
279 | struct node *root; | ||
280 | |||
281 | /* Duplicate the node at the root of the subtree */ | ||
282 | root = calloc(1, sizeof(*root)); | ||
283 | if (!root) { | ||
284 | perror("calloc"); | ||
285 | abort(); | ||
286 | } | ||
287 | |||
288 | root->idx = subtree->idx; | ||
289 | root->mask = subtree->mask; | ||
290 | root->num_after = subtree->num_after; | ||
291 | |||
292 | /* As needed, recursively duplicate the left and right subtrees */ | ||
293 | if (subtree->left) { | ||
294 | root->left = node_copy_subtree(subtree->left); | ||
295 | root->left->parent = root; | ||
296 | } | ||
297 | |||
298 | if (subtree->right) { | ||
299 | root->right = node_copy_subtree(subtree->right); | ||
300 | root->right->parent = root; | ||
301 | } | ||
302 | |||
303 | return root; | ||
304 | } | ||
305 | |||
306 | /* Searches for and returns a pointer to the node that describes the setting | ||
307 | * of the bit given by idx. A node describes the setting of a bit if its | ||
308 | * index is within the bits described by the mask bits or the number of | ||
309 | * contiguous bits set after the mask. Returns NULL if there is no such node. | ||
310 | */ | ||
311 | static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) | ||
312 | { | ||
313 | struct node *nodep; | ||
314 | |||
315 | /* Find the node that describes the setting of the bit at idx */ | ||
316 | for (nodep = s->root; nodep; | ||
317 | nodep = nodep->idx > idx ? nodep->left : nodep->right) { | ||
318 | if (idx >= nodep->idx && | ||
319 | idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) | ||
320 | break; | ||
321 | } | ||
322 | |||
323 | return nodep; | ||
324 | } | ||
325 | |||
326 | /* Entry Requirements: | ||
327 | * + A node that describes the setting of idx is not already present. | ||
328 | * | ||
329 | * Adds a new node to describe the setting of the bit at the index given | ||
330 | * by idx. Returns a pointer to the newly added node. | ||
331 | * | ||
332 | * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced. | ||
333 | */ | ||
334 | static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) | ||
335 | { | ||
336 | struct node *nodep, *parentp, *prev; | ||
337 | |||
338 | /* Allocate and initialize the new node. */ | ||
339 | nodep = calloc(1, sizeof(*nodep)); | ||
340 | if (!nodep) { | ||
341 | perror("calloc"); | ||
342 | abort(); | ||
343 | } | ||
344 | |||
345 | nodep->idx = idx & -MASK_BITS; | ||
346 | |||
347 | /* If no nodes, set it up as the root node. */ | ||
348 | if (!s->root) { | ||
349 | s->root = nodep; | ||
350 | return nodep; | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Find the parent where the new node should be attached | ||
355 | * and add the node there. | ||
356 | */ | ||
357 | parentp = s->root; | ||
358 | while (true) { | ||
359 | if (idx < parentp->idx) { | ||
360 | if (!parentp->left) { | ||
361 | parentp->left = nodep; | ||
362 | nodep->parent = parentp; | ||
363 | break; | ||
364 | } | ||
365 | parentp = parentp->left; | ||
366 | } else { | ||
367 | assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1); | ||
368 | if (!parentp->right) { | ||
369 | parentp->right = nodep; | ||
370 | nodep->parent = parentp; | ||
371 | break; | ||
372 | } | ||
373 | parentp = parentp->right; | ||
374 | } | ||
375 | } | ||
376 | |||
377 | /* | ||
378 | * Does num_after bits of previous node overlap with the mask | ||
379 | * of the new node? If so set the bits in the new nodes mask | ||
380 | * and reduce the previous nodes num_after. | ||
381 | */ | ||
382 | prev = node_prev(s, nodep); | ||
383 | while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) { | ||
384 | unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1) | ||
385 | - nodep->idx; | ||
386 | assert(prev->num_after > 0); | ||
387 | assert(n1 < MASK_BITS); | ||
388 | assert(!(nodep->mask & (1 << n1))); | ||
389 | nodep->mask |= (1 << n1); | ||
390 | prev->num_after--; | ||
391 | } | ||
392 | |||
393 | return nodep; | ||
394 | } | ||
395 | |||
396 | /* Returns whether all the bits in the sparsebit array are set. */ | ||
397 | bool sparsebit_all_set(struct sparsebit *s) | ||
398 | { | ||
399 | /* | ||
400 | * If any nodes there must be at least one bit set. Only case | ||
401 | * where a bit is set and total num set is 0, is when all bits | ||
402 | * are set. | ||
403 | */ | ||
404 | return s->root && s->num_set == 0; | ||
405 | } | ||
406 | |||
407 | /* Clears all bits described by the node pointed to by nodep, then | ||
408 | * removes the node. | ||
409 | */ | ||
410 | static void node_rm(struct sparsebit *s, struct node *nodep) | ||
411 | { | ||
412 | struct node *tmp; | ||
413 | sparsebit_num_t num_set; | ||
414 | |||
415 | num_set = node_num_set(nodep); | ||
416 | assert(s->num_set >= num_set || sparsebit_all_set(s)); | ||
417 | s->num_set -= node_num_set(nodep); | ||
418 | |||
419 | /* Have both left and right child */ | ||
420 | if (nodep->left && nodep->right) { | ||
421 | /* | ||
422 | * Move left children to the leftmost leaf node | ||
423 | * of the right child. | ||
424 | */ | ||
425 | for (tmp = nodep->right; tmp->left; tmp = tmp->left) | ||
426 | ; | ||
427 | tmp->left = nodep->left; | ||
428 | nodep->left = NULL; | ||
429 | tmp->left->parent = tmp; | ||
430 | } | ||
431 | |||
432 | /* Left only child */ | ||
433 | if (nodep->left) { | ||
434 | if (!nodep->parent) { | ||
435 | s->root = nodep->left; | ||
436 | nodep->left->parent = NULL; | ||
437 | } else { | ||
438 | nodep->left->parent = nodep->parent; | ||
439 | if (nodep == nodep->parent->left) | ||
440 | nodep->parent->left = nodep->left; | ||
441 | else { | ||
442 | assert(nodep == nodep->parent->right); | ||
443 | nodep->parent->right = nodep->left; | ||
444 | } | ||
445 | } | ||
446 | |||
447 | nodep->parent = nodep->left = nodep->right = NULL; | ||
448 | free(nodep); | ||
449 | |||
450 | return; | ||
451 | } | ||
452 | |||
453 | |||
454 | /* Right only child */ | ||
455 | if (nodep->right) { | ||
456 | if (!nodep->parent) { | ||
457 | s->root = nodep->right; | ||
458 | nodep->right->parent = NULL; | ||
459 | } else { | ||
460 | nodep->right->parent = nodep->parent; | ||
461 | if (nodep == nodep->parent->left) | ||
462 | nodep->parent->left = nodep->right; | ||
463 | else { | ||
464 | assert(nodep == nodep->parent->right); | ||
465 | nodep->parent->right = nodep->right; | ||
466 | } | ||
467 | } | ||
468 | |||
469 | nodep->parent = nodep->left = nodep->right = NULL; | ||
470 | free(nodep); | ||
471 | |||
472 | return; | ||
473 | } | ||
474 | |||
475 | /* Leaf Node */ | ||
476 | if (!nodep->parent) { | ||
477 | s->root = NULL; | ||
478 | } else { | ||
479 | if (nodep->parent->left == nodep) | ||
480 | nodep->parent->left = NULL; | ||
481 | else { | ||
482 | assert(nodep == nodep->parent->right); | ||
483 | nodep->parent->right = NULL; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | nodep->parent = nodep->left = nodep->right = NULL; | ||
488 | free(nodep); | ||
489 | |||
490 | return; | ||
491 | } | ||
492 | |||
493 | /* Splits the node containing the bit at idx so that there is a node | ||
494 | * that starts at the specified index. If no such node exists, a new | ||
495 | * node at the specified index is created. Returns the new node. | ||
496 | * | ||
497 | * idx must start of a mask boundary. | ||
498 | */ | ||
499 | static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) | ||
500 | { | ||
501 | struct node *nodep1, *nodep2; | ||
502 | sparsebit_idx_t offset; | ||
503 | sparsebit_num_t orig_num_after; | ||
504 | |||
505 | assert(!(idx % MASK_BITS)); | ||
506 | |||
507 | /* | ||
508 | * Is there a node that describes the setting of idx? | ||
509 | * If not, add it. | ||
510 | */ | ||
511 | nodep1 = node_find(s, idx); | ||
512 | if (!nodep1) | ||
513 | return node_add(s, idx); | ||
514 | |||
515 | /* | ||
516 | * All done if the starting index of the node is where the | ||
517 | * split should occur. | ||
518 | */ | ||
519 | if (nodep1->idx == idx) | ||
520 | return nodep1; | ||
521 | |||
522 | /* | ||
523 | * Split point not at start of mask, so it must be part of | ||
524 | * bits described by num_after. | ||
525 | */ | ||
526 | |||
527 | /* | ||
528 | * Calculate offset within num_after for where the split is | ||
529 | * to occur. | ||
530 | */ | ||
531 | offset = idx - (nodep1->idx + MASK_BITS); | ||
532 | orig_num_after = nodep1->num_after; | ||
533 | |||
534 | /* | ||
535 | * Add a new node to describe the bits starting at | ||
536 | * the split point. | ||
537 | */ | ||
538 | nodep1->num_after = offset; | ||
539 | nodep2 = node_add(s, idx); | ||
540 | |||
541 | /* Move bits after the split point into the new node */ | ||
542 | nodep2->num_after = orig_num_after - offset; | ||
543 | if (nodep2->num_after >= MASK_BITS) { | ||
544 | nodep2->mask = ~(mask_t) 0; | ||
545 | nodep2->num_after -= MASK_BITS; | ||
546 | } else { | ||
547 | nodep2->mask = (1 << nodep2->num_after) - 1; | ||
548 | nodep2->num_after = 0; | ||
549 | } | ||
550 | |||
551 | return nodep2; | ||
552 | } | ||
553 | |||
554 | /* Iteratively reduces the node pointed to by nodep and its adjacent | ||
555 | * nodes into a more compact form. For example, a node with a mask with | ||
556 | * all bits set adjacent to a previous node, will get combined into a | ||
557 | * single node with an increased num_after setting. | ||
558 | * | ||
559 | * After each reduction, a further check is made to see if additional | ||
560 | * reductions are possible with the new previous and next nodes. Note, | ||
561 | * a search for a reduction is only done across the nodes nearest nodep | ||
562 | * and those that became part of a reduction. Reductions beyond nodep | ||
563 | * and the adjacent nodes that are reduced are not discovered. It is the | ||
564 | * responsibility of the caller to pass a nodep that is within one node | ||
565 | * of each possible reduction. | ||
566 | * | ||
567 | * This function does not fix the temporary violation of all invariants. | ||
568 | * For example it does not fix the case where the bit settings described | ||
569 | * by two or more nodes overlap. Such a violation introduces the potential | ||
570 | * complication of a bit setting for a specific index having different settings | ||
571 | * in different nodes. This would then introduce the further complication | ||
572 | * of which node has the correct setting of the bit and thus such conditions | ||
573 | * are not allowed. | ||
574 | * | ||
575 | * This function is designed to fix invariant violations that are introduced | ||
576 | * by node_split() and by changes to the nodes mask or num_after members. | ||
577 | * For example, when setting a bit within a nodes mask, the function that | ||
578 | * sets the bit doesn't have to worry about whether the setting of that | ||
579 | * bit caused the mask to have leading only or trailing only bits set. | ||
580 | * Instead, the function can call node_reduce(), with nodep equal to the | ||
581 | * node address that it set a mask bit in, and node_reduce() will notice | ||
582 | * the cases of leading or trailing only bits and that there is an | ||
583 | * adjacent node that the bit settings could be merged into. | ||
584 | * | ||
585 | * This implementation specifically detects and corrects violation of the | ||
586 | * following invariants: | ||
587 | * | ||
588 | * + Node are only used to represent bits that are set. | ||
589 | * Nodes with a mask of 0 and num_after of 0 are not allowed. | ||
590 | * | ||
591 | * + The setting of at least one bit is always described in a nodes | ||
592 | * mask (mask >= 1). | ||
593 | * | ||
594 | * + A node with all mask bits set only occurs when the last bit | ||
595 | * described by the previous node is not equal to this nodes | ||
596 | * starting index - 1. All such occurences of this condition are | ||
597 | * avoided by moving the setting of the nodes mask bits into | ||
598 | * the previous nodes num_after setting. | ||
599 | */ | ||
600 | static void node_reduce(struct sparsebit *s, struct node *nodep) | ||
601 | { | ||
602 | bool reduction_performed; | ||
603 | |||
604 | do { | ||
605 | reduction_performed = false; | ||
606 | struct node *prev, *next, *tmp; | ||
607 | |||
608 | /* 1) Potential reductions within the current node. */ | ||
609 | |||
610 | /* Nodes with all bits cleared may be removed. */ | ||
611 | if (nodep->mask == 0 && nodep->num_after == 0) { | ||
612 | /* | ||
613 | * About to remove the node pointed to by | ||
614 | * nodep, which normally would cause a problem | ||
615 | * for the next pass through the reduction loop, | ||
616 | * because the node at the starting point no longer | ||
617 | * exists. This potential problem is handled | ||
618 | * by first remembering the location of the next | ||
619 | * or previous nodes. Doesn't matter which, because | ||
620 | * once the node at nodep is removed, there will be | ||
621 | * no other nodes between prev and next. | ||
622 | * | ||
623 | * Note, the checks performed on nodep against both | ||
624 | * both prev and next both check for an adjacent | ||
625 | * node that can be reduced into a single node. As | ||
626 | * such, after removing the node at nodep, doesn't | ||
627 | * matter whether the nodep for the next pass | ||
628 | * through the loop is equal to the previous pass | ||
629 | * prev or next node. Either way, on the next pass | ||
630 | * the one not selected will become either the | ||
631 | * prev or next node. | ||
632 | */ | ||
633 | tmp = node_next(s, nodep); | ||
634 | if (!tmp) | ||
635 | tmp = node_prev(s, nodep); | ||
636 | |||
637 | node_rm(s, nodep); | ||
638 | nodep = NULL; | ||
639 | |||
640 | nodep = tmp; | ||
641 | reduction_performed = true; | ||
642 | continue; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * When the mask is 0, can reduce the amount of num_after | ||
647 | * bits by moving the initial num_after bits into the mask. | ||
648 | */ | ||
649 | if (nodep->mask == 0) { | ||
650 | assert(nodep->num_after != 0); | ||
651 | assert(nodep->idx + MASK_BITS > nodep->idx); | ||
652 | |||
653 | nodep->idx += MASK_BITS; | ||
654 | |||
655 | if (nodep->num_after >= MASK_BITS) { | ||
656 | nodep->mask = ~0; | ||
657 | nodep->num_after -= MASK_BITS; | ||
658 | } else { | ||
659 | nodep->mask = (1u << nodep->num_after) - 1; | ||
660 | nodep->num_after = 0; | ||
661 | } | ||
662 | |||
663 | reduction_performed = true; | ||
664 | continue; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * 2) Potential reductions between the current and | ||
669 | * previous nodes. | ||
670 | */ | ||
671 | prev = node_prev(s, nodep); | ||
672 | if (prev) { | ||
673 | sparsebit_idx_t prev_highest_bit; | ||
674 | |||
675 | /* Nodes with no bits set can be removed. */ | ||
676 | if (prev->mask == 0 && prev->num_after == 0) { | ||
677 | node_rm(s, prev); | ||
678 | |||
679 | reduction_performed = true; | ||
680 | continue; | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * All mask bits set and previous node has | ||
685 | * adjacent index. | ||
686 | */ | ||
687 | if (nodep->mask + 1 == 0 && | ||
688 | prev->idx + MASK_BITS == nodep->idx) { | ||
689 | prev->num_after += MASK_BITS + nodep->num_after; | ||
690 | nodep->mask = 0; | ||
691 | nodep->num_after = 0; | ||
692 | |||
693 | reduction_performed = true; | ||
694 | continue; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Is node adjacent to previous node and the node | ||
699 | * contains a single contiguous range of bits | ||
700 | * starting from the beginning of the mask? | ||
701 | */ | ||
702 | prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after; | ||
703 | if (prev_highest_bit + 1 == nodep->idx && | ||
704 | (nodep->mask | (nodep->mask >> 1)) == nodep->mask) { | ||
705 | /* | ||
706 | * How many contiguous bits are there? | ||
707 | * Is equal to the total number of set | ||
708 | * bits, due to an earlier check that | ||
709 | * there is a single contiguous range of | ||
710 | * set bits. | ||
711 | */ | ||
712 | unsigned int num_contiguous | ||
713 | = __builtin_popcount(nodep->mask); | ||
714 | assert((num_contiguous > 0) && | ||
715 | ((1ULL << num_contiguous) - 1) == nodep->mask); | ||
716 | |||
717 | prev->num_after += num_contiguous; | ||
718 | nodep->mask = 0; | ||
719 | |||
720 | /* | ||
721 | * For predictable performance, handle special | ||
722 | * case where all mask bits are set and there | ||
723 | * is a non-zero num_after setting. This code | ||
724 | * is functionally correct without the following | ||
725 | * conditionalized statements, but without them | ||
726 | * the value of num_after is only reduced by | ||
727 | * the number of mask bits per pass. There are | ||
728 | * cases where num_after can be close to 2^64. | ||
729 | * Without this code it could take nearly | ||
730 | * (2^64) / 32 passes to perform the full | ||
731 | * reduction. | ||
732 | */ | ||
733 | if (num_contiguous == MASK_BITS) { | ||
734 | prev->num_after += nodep->num_after; | ||
735 | nodep->num_after = 0; | ||
736 | } | ||
737 | |||
738 | reduction_performed = true; | ||
739 | continue; | ||
740 | } | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * 3) Potential reductions between the current and | ||
745 | * next nodes. | ||
746 | */ | ||
747 | next = node_next(s, nodep); | ||
748 | if (next) { | ||
749 | /* Nodes with no bits set can be removed. */ | ||
750 | if (next->mask == 0 && next->num_after == 0) { | ||
751 | node_rm(s, next); | ||
752 | reduction_performed = true; | ||
753 | continue; | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * Is next node index adjacent to current node | ||
758 | * and has a mask with all bits set? | ||
759 | */ | ||
760 | if (next->idx == nodep->idx + MASK_BITS + nodep->num_after && | ||
761 | next->mask == ~(mask_t) 0) { | ||
762 | nodep->num_after += MASK_BITS; | ||
763 | next->mask = 0; | ||
764 | nodep->num_after += next->num_after; | ||
765 | next->num_after = 0; | ||
766 | |||
767 | node_rm(s, next); | ||
768 | next = NULL; | ||
769 | |||
770 | reduction_performed = true; | ||
771 | continue; | ||
772 | } | ||
773 | } | ||
774 | } while (nodep && reduction_performed); | ||
775 | } | ||
776 | |||
777 | /* Returns whether the bit at the index given by idx, within the | ||
778 | * sparsebit array is set or not. | ||
779 | */ | ||
780 | bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) | ||
781 | { | ||
782 | struct node *nodep; | ||
783 | |||
784 | /* Find the node that describes the setting of the bit at idx */ | ||
785 | for (nodep = s->root; nodep; | ||
786 | nodep = nodep->idx > idx ? nodep->left : nodep->right) | ||
787 | if (idx >= nodep->idx && | ||
788 | idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) | ||
789 | goto have_node; | ||
790 | |||
791 | return false; | ||
792 | |||
793 | have_node: | ||
794 | /* Bit is set if it is any of the bits described by num_after */ | ||
795 | if (nodep->num_after && idx >= nodep->idx + MASK_BITS) | ||
796 | return true; | ||
797 | |||
798 | /* Is the corresponding mask bit set */ | ||
799 | assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS); | ||
800 | return !!(nodep->mask & (1 << (idx - nodep->idx))); | ||
801 | } | ||
802 | |||
803 | /* Within the sparsebit array pointed to by s, sets the bit | ||
804 | * at the index given by idx. | ||
805 | */ | ||
806 | static void bit_set(struct sparsebit *s, sparsebit_idx_t idx) | ||
807 | { | ||
808 | struct node *nodep; | ||
809 | |||
810 | /* Skip bits that are already set */ | ||
811 | if (sparsebit_is_set(s, idx)) | ||
812 | return; | ||
813 | |||
814 | /* | ||
815 | * Get a node where the bit at idx is described by the mask. | ||
816 | * The node_split will also create a node, if there isn't | ||
817 | * already a node that describes the setting of bit. | ||
818 | */ | ||
819 | nodep = node_split(s, idx & -MASK_BITS); | ||
820 | |||
821 | /* Set the bit within the nodes mask */ | ||
822 | assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); | ||
823 | assert(!(nodep->mask & (1 << (idx - nodep->idx)))); | ||
824 | nodep->mask |= 1 << (idx - nodep->idx); | ||
825 | s->num_set++; | ||
826 | |||
827 | node_reduce(s, nodep); | ||
828 | } | ||
829 | |||
830 | /* Within the sparsebit array pointed to by s, clears the bit | ||
831 | * at the index given by idx. | ||
832 | */ | ||
833 | static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx) | ||
834 | { | ||
835 | struct node *nodep; | ||
836 | |||
837 | /* Skip bits that are already cleared */ | ||
838 | if (!sparsebit_is_set(s, idx)) | ||
839 | return; | ||
840 | |||
841 | /* Is there a node that describes the setting of this bit? */ | ||
842 | nodep = node_find(s, idx); | ||
843 | if (!nodep) | ||
844 | return; | ||
845 | |||
846 | /* | ||
847 | * If a num_after bit, split the node, so that the bit is | ||
848 | * part of a node mask. | ||
849 | */ | ||
850 | if (idx >= nodep->idx + MASK_BITS) | ||
851 | nodep = node_split(s, idx & -MASK_BITS); | ||
852 | |||
853 | /* | ||
854 | * After node_split above, bit at idx should be within the mask. | ||
855 | * Clear that bit. | ||
856 | */ | ||
857 | assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); | ||
858 | assert(nodep->mask & (1 << (idx - nodep->idx))); | ||
859 | nodep->mask &= ~(1 << (idx - nodep->idx)); | ||
860 | assert(s->num_set > 0 || sparsebit_all_set(s)); | ||
861 | s->num_set--; | ||
862 | |||
863 | node_reduce(s, nodep); | ||
864 | } | ||
865 | |||
866 | /* Recursively dumps to the FILE stream given by stream the contents | ||
867 | * of the sub-tree of nodes pointed to by nodep. Each line of output | ||
868 | * is prefixed by the number of spaces given by indent. On each | ||
869 | * recursion, the indent amount is increased by 2. This causes nodes | ||
870 | * at each level deeper into the binary search tree to be displayed | ||
871 | * with a greater indent. | ||
872 | */ | ||
873 | static void dump_nodes(FILE *stream, struct node *nodep, | ||
874 | unsigned int indent) | ||
875 | { | ||
876 | char *node_type; | ||
877 | |||
878 | /* Dump contents of node */ | ||
879 | if (!nodep->parent) | ||
880 | node_type = "root"; | ||
881 | else if (nodep == nodep->parent->left) | ||
882 | node_type = "left"; | ||
883 | else { | ||
884 | assert(nodep == nodep->parent->right); | ||
885 | node_type = "right"; | ||
886 | } | ||
887 | fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep); | ||
888 | fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "", | ||
889 | nodep->parent, nodep->left, nodep->right); | ||
890 | fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n", | ||
891 | indent, "", nodep->idx, nodep->mask, nodep->num_after); | ||
892 | |||
893 | /* If present, dump contents of left child nodes */ | ||
894 | if (nodep->left) | ||
895 | dump_nodes(stream, nodep->left, indent + 2); | ||
896 | |||
897 | /* If present, dump contents of right child nodes */ | ||
898 | if (nodep->right) | ||
899 | dump_nodes(stream, nodep->right, indent + 2); | ||
900 | } | ||
901 | |||
902 | static inline sparsebit_idx_t node_first_set(struct node *nodep, int start) | ||
903 | { | ||
904 | mask_t leading = (mask_t)1 << start; | ||
905 | int n1 = __builtin_ctz(nodep->mask & -leading); | ||
906 | |||
907 | return nodep->idx + n1; | ||
908 | } | ||
909 | |||
910 | static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) | ||
911 | { | ||
912 | mask_t leading = (mask_t)1 << start; | ||
913 | int n1 = __builtin_ctz(~nodep->mask & -leading); | ||
914 | |||
915 | return nodep->idx + n1; | ||
916 | } | ||
917 | |||
918 | /* Dumps to the FILE stream specified by stream, the implementation dependent | ||
919 | * internal state of s. Each line of output is prefixed with the number | ||
920 | * of spaces given by indent. The output is completely implementation | ||
921 | * dependent and subject to change. Output from this function should only | ||
922 | * be used for diagnostic purposes. For example, this function can be | ||
923 | * used by test cases after they detect an unexpected condition, as a means | ||
924 | * to capture diagnostic information. | ||
925 | */ | ||
926 | static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, | ||
927 | unsigned int indent) | ||
928 | { | ||
929 | /* Dump the contents of s */ | ||
930 | fprintf(stream, "%*sroot: %p\n", indent, "", s->root); | ||
931 | fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set); | ||
932 | |||
933 | if (s->root) | ||
934 | dump_nodes(stream, s->root, indent); | ||
935 | } | ||
936 | |||
937 | /* Allocates and returns a new sparsebit array. The initial state | ||
938 | * of the newly allocated sparsebit array has all bits cleared. | ||
939 | */ | ||
940 | struct sparsebit *sparsebit_alloc(void) | ||
941 | { | ||
942 | struct sparsebit *s; | ||
943 | |||
944 | /* Allocate top level structure. */ | ||
945 | s = calloc(1, sizeof(*s)); | ||
946 | if (!s) { | ||
947 | perror("calloc"); | ||
948 | abort(); | ||
949 | } | ||
950 | |||
951 | return s; | ||
952 | } | ||
953 | |||
954 | /* Frees the implementation dependent data for the sparsebit array | ||
955 | * pointed to by s and poisons the pointer to that data. | ||
956 | */ | ||
957 | void sparsebit_free(struct sparsebit **sbitp) | ||
958 | { | ||
959 | struct sparsebit *s = *sbitp; | ||
960 | |||
961 | if (!s) | ||
962 | return; | ||
963 | |||
964 | sparsebit_clear_all(s); | ||
965 | free(s); | ||
966 | *sbitp = NULL; | ||
967 | } | ||
968 | |||
969 | /* Makes a copy of the sparsebit array given by s, to the sparsebit | ||
970 | * array given by d. Note, d must have already been allocated via | ||
971 | * sparsebit_alloc(). It can though already have bits set, which | ||
972 | * if different from src will be cleared. | ||
973 | */ | ||
974 | void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) | ||
975 | { | ||
976 | /* First clear any bits already set in the destination */ | ||
977 | sparsebit_clear_all(d); | ||
978 | |||
979 | if (s->root) { | ||
980 | d->root = node_copy_subtree(s->root); | ||
981 | d->num_set = s->num_set; | ||
982 | } | ||
983 | } | ||
984 | |||
985 | /* Returns whether num consecutive bits starting at idx are all set. */ | ||
986 | bool sparsebit_is_set_num(struct sparsebit *s, | ||
987 | sparsebit_idx_t idx, sparsebit_num_t num) | ||
988 | { | ||
989 | sparsebit_idx_t next_cleared; | ||
990 | |||
991 | assert(num > 0); | ||
992 | assert(idx + num - 1 >= idx); | ||
993 | |||
994 | /* With num > 0, the first bit must be set. */ | ||
995 | if (!sparsebit_is_set(s, idx)) | ||
996 | return false; | ||
997 | |||
998 | /* Find the next cleared bit */ | ||
999 | next_cleared = sparsebit_next_clear(s, idx); | ||
1000 | |||
1001 | /* | ||
1002 | * If no cleared bits beyond idx, then there are at least num | ||
1003 | * set bits. idx + num doesn't wrap. Otherwise check if | ||
1004 | * there are enough set bits between idx and the next cleared bit. | ||
1005 | */ | ||
1006 | return next_cleared == 0 || next_cleared - idx >= num; | ||
1007 | } | ||
1008 | |||
1009 | /* Returns whether the bit at the index given by idx. */ | ||
1010 | bool sparsebit_is_clear(struct sparsebit *s, | ||
1011 | sparsebit_idx_t idx) | ||
1012 | { | ||
1013 | return !sparsebit_is_set(s, idx); | ||
1014 | } | ||
1015 | |||
1016 | /* Returns whether num consecutive bits starting at idx are all cleared. */ | ||
1017 | bool sparsebit_is_clear_num(struct sparsebit *s, | ||
1018 | sparsebit_idx_t idx, sparsebit_num_t num) | ||
1019 | { | ||
1020 | sparsebit_idx_t next_set; | ||
1021 | |||
1022 | assert(num > 0); | ||
1023 | assert(idx + num - 1 >= idx); | ||
1024 | |||
1025 | /* With num > 0, the first bit must be cleared. */ | ||
1026 | if (!sparsebit_is_clear(s, idx)) | ||
1027 | return false; | ||
1028 | |||
1029 | /* Find the next set bit */ | ||
1030 | next_set = sparsebit_next_set(s, idx); | ||
1031 | |||
1032 | /* | ||
1033 | * If no set bits beyond idx, then there are at least num | ||
1034 | * cleared bits. idx + num doesn't wrap. Otherwise check if | ||
1035 | * there are enough cleared bits between idx and the next set bit. | ||
1036 | */ | ||
1037 | return next_set == 0 || next_set - idx >= num; | ||
1038 | } | ||
1039 | |||
1040 | /* Returns the total number of bits set. Note: 0 is also returned for | ||
1041 | * the case of all bits set. This is because with all bits set, there | ||
1042 | * is 1 additional bit set beyond what can be represented in the return | ||
1043 | * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, | ||
1044 | * to determine if the sparsebit array has any bits set. | ||
1045 | */ | ||
1046 | sparsebit_num_t sparsebit_num_set(struct sparsebit *s) | ||
1047 | { | ||
1048 | return s->num_set; | ||
1049 | } | ||
1050 | |||
1051 | /* Returns whether any bit is set in the sparsebit array. */ | ||
1052 | bool sparsebit_any_set(struct sparsebit *s) | ||
1053 | { | ||
1054 | /* | ||
1055 | * Nodes only describe set bits. If any nodes then there | ||
1056 | * is at least 1 bit set. | ||
1057 | */ | ||
1058 | if (!s->root) | ||
1059 | return false; | ||
1060 | |||
1061 | /* | ||
1062 | * Every node should have a non-zero mask. For now will | ||
1063 | * just assure that the root node has a non-zero mask, | ||
1064 | * which is a quick check that at least 1 bit is set. | ||
1065 | */ | ||
1066 | assert(s->root->mask != 0); | ||
1067 | assert(s->num_set > 0 || | ||
1068 | (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS && | ||
1069 | s->root->mask == ~(mask_t) 0)); | ||
1070 | |||
1071 | return true; | ||
1072 | } | ||
1073 | |||
1074 | /* Returns whether all the bits in the sparsebit array are cleared. */ | ||
1075 | bool sparsebit_all_clear(struct sparsebit *s) | ||
1076 | { | ||
1077 | return !sparsebit_any_set(s); | ||
1078 | } | ||
1079 | |||
1080 | /* Returns whether all the bits in the sparsebit array are set. */ | ||
1081 | bool sparsebit_any_clear(struct sparsebit *s) | ||
1082 | { | ||
1083 | return !sparsebit_all_set(s); | ||
1084 | } | ||
1085 | |||
1086 | /* Returns the index of the first set bit. Abort if no bits are set. | ||
1087 | */ | ||
1088 | sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) | ||
1089 | { | ||
1090 | struct node *nodep; | ||
1091 | |||
1092 | /* Validate at least 1 bit is set */ | ||
1093 | assert(sparsebit_any_set(s)); | ||
1094 | |||
1095 | nodep = node_first(s); | ||
1096 | return node_first_set(nodep, 0); | ||
1097 | } | ||
1098 | |||
1099 | /* Returns the index of the first cleared bit. Abort if | ||
1100 | * no bits are cleared. | ||
1101 | */ | ||
1102 | sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) | ||
1103 | { | ||
1104 | struct node *nodep1, *nodep2; | ||
1105 | |||
1106 | /* Validate at least 1 bit is cleared. */ | ||
1107 | assert(sparsebit_any_clear(s)); | ||
1108 | |||
1109 | /* If no nodes or first node index > 0 then lowest cleared is 0 */ | ||
1110 | nodep1 = node_first(s); | ||
1111 | if (!nodep1 || nodep1->idx > 0) | ||
1112 | return 0; | ||
1113 | |||
1114 | /* Does the mask in the first node contain any cleared bits. */ | ||
1115 | if (nodep1->mask != ~(mask_t) 0) | ||
1116 | return node_first_clear(nodep1, 0); | ||
1117 | |||
1118 | /* | ||
1119 | * All mask bits set in first node. If there isn't a second node | ||
1120 | * then the first cleared bit is the first bit after the bits | ||
1121 | * described by the first node. | ||
1122 | */ | ||
1123 | nodep2 = node_next(s, nodep1); | ||
1124 | if (!nodep2) { | ||
1125 | /* | ||
1126 | * No second node. First cleared bit is first bit beyond | ||
1127 | * bits described by first node. | ||
1128 | */ | ||
1129 | assert(nodep1->mask == ~(mask_t) 0); | ||
1130 | assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0); | ||
1131 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
1132 | } | ||
1133 | |||
1134 | /* | ||
1135 | * There is a second node. | ||
1136 | * If it is not adjacent to the first node, then there is a gap | ||
1137 | * of cleared bits between the nodes, and the first cleared bit | ||
1138 | * is the first bit within the gap. | ||
1139 | */ | ||
1140 | if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) | ||
1141 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
1142 | |||
1143 | /* | ||
1144 | * Second node is adjacent to the first node. | ||
1145 | * Because it is adjacent, its mask should be non-zero. If all | ||
1146 | * its mask bits are set, then with it being adjacent, it should | ||
1147 | * have had the mask bits moved into the num_after setting of the | ||
1148 | * previous node. | ||
1149 | */ | ||
1150 | return node_first_clear(nodep2, 0); | ||
1151 | } | ||
1152 | |||
1153 | /* Returns index of next bit set within s after the index given by prev. | ||
1154 | * Returns 0 if there are no bits after prev that are set. | ||
1155 | */ | ||
1156 | sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, | ||
1157 | sparsebit_idx_t prev) | ||
1158 | { | ||
1159 | sparsebit_idx_t lowest_possible = prev + 1; | ||
1160 | sparsebit_idx_t start; | ||
1161 | struct node *nodep; | ||
1162 | |||
1163 | /* A bit after the highest index can't be set. */ | ||
1164 | if (lowest_possible == 0) | ||
1165 | return 0; | ||
1166 | |||
1167 | /* | ||
1168 | * Find the leftmost 'candidate' overlapping or to the right | ||
1169 | * of lowest_possible. | ||
1170 | */ | ||
1171 | struct node *candidate = NULL; | ||
1172 | |||
1173 | /* True iff lowest_possible is within candidate */ | ||
1174 | bool contains = false; | ||
1175 | |||
1176 | /* | ||
1177 | * Find node that describes setting of bit at lowest_possible. | ||
1178 | * If such a node doesn't exist, find the node with the lowest | ||
1179 | * starting index that is > lowest_possible. | ||
1180 | */ | ||
1181 | for (nodep = s->root; nodep;) { | ||
1182 | if ((nodep->idx + MASK_BITS + nodep->num_after - 1) | ||
1183 | >= lowest_possible) { | ||
1184 | candidate = nodep; | ||
1185 | if (candidate->idx <= lowest_possible) { | ||
1186 | contains = true; | ||
1187 | break; | ||
1188 | } | ||
1189 | nodep = nodep->left; | ||
1190 | } else { | ||
1191 | nodep = nodep->right; | ||
1192 | } | ||
1193 | } | ||
1194 | if (!candidate) | ||
1195 | return 0; | ||
1196 | |||
1197 | assert(candidate->mask != 0); | ||
1198 | |||
1199 | /* Does the candidate node describe the setting of lowest_possible? */ | ||
1200 | if (!contains) { | ||
1201 | /* | ||
1202 | * Candidate doesn't describe setting of bit at lowest_possible. | ||
1203 | * Candidate points to the first node with a starting index | ||
1204 | * > lowest_possible. | ||
1205 | */ | ||
1206 | assert(candidate->idx > lowest_possible); | ||
1207 | |||
1208 | return node_first_set(candidate, 0); | ||
1209 | } | ||
1210 | |||
1211 | /* | ||
1212 | * Candidate describes setting of bit at lowest_possible. | ||
1213 | * Note: although the node describes the setting of the bit | ||
1214 | * at lowest_possible, its possible that its setting and the | ||
1215 | * setting of all latter bits described by this node are 0. | ||
1216 | * For now, just handle the cases where this node describes | ||
1217 | * a bit at or after an index of lowest_possible that is set. | ||
1218 | */ | ||
1219 | start = lowest_possible - candidate->idx; | ||
1220 | |||
1221 | if (start < MASK_BITS && candidate->mask >= (1 << start)) | ||
1222 | return node_first_set(candidate, start); | ||
1223 | |||
1224 | if (candidate->num_after) { | ||
1225 | sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS; | ||
1226 | |||
1227 | return lowest_possible < first_num_after_idx | ||
1228 | ? first_num_after_idx : lowest_possible; | ||
1229 | } | ||
1230 | |||
1231 | /* | ||
1232 | * Although candidate node describes setting of bit at | ||
1233 | * the index of lowest_possible, all bits at that index and | ||
1234 | * latter that are described by candidate are cleared. With | ||
1235 | * this, the next bit is the first bit in the next node, if | ||
1236 | * such a node exists. If a next node doesn't exist, then | ||
1237 | * there is no next set bit. | ||
1238 | */ | ||
1239 | candidate = node_next(s, candidate); | ||
1240 | if (!candidate) | ||
1241 | return 0; | ||
1242 | |||
1243 | return node_first_set(candidate, 0); | ||
1244 | } | ||
1245 | |||
1246 | /* Returns index of next bit cleared within s after the index given by prev. | ||
1247 | * Returns 0 if there are no bits after prev that are cleared. | ||
1248 | */ | ||
1249 | sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, | ||
1250 | sparsebit_idx_t prev) | ||
1251 | { | ||
1252 | sparsebit_idx_t lowest_possible = prev + 1; | ||
1253 | sparsebit_idx_t idx; | ||
1254 | struct node *nodep1, *nodep2; | ||
1255 | |||
1256 | /* A bit after the highest index can't be set. */ | ||
1257 | if (lowest_possible == 0) | ||
1258 | return 0; | ||
1259 | |||
1260 | /* | ||
1261 | * Does a node describing the setting of lowest_possible exist? | ||
1262 | * If not, the bit at lowest_possible is cleared. | ||
1263 | */ | ||
1264 | nodep1 = node_find(s, lowest_possible); | ||
1265 | if (!nodep1) | ||
1266 | return lowest_possible; | ||
1267 | |||
1268 | /* Does a mask bit in node 1 describe the next cleared bit. */ | ||
1269 | for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++) | ||
1270 | if (!(nodep1->mask & (1 << idx))) | ||
1271 | return nodep1->idx + idx; | ||
1272 | |||
1273 | /* | ||
1274 | * Next cleared bit is not described by node 1. If there | ||
1275 | * isn't a next node, then next cleared bit is described | ||
1276 | * by bit after the bits described by the first node. | ||
1277 | */ | ||
1278 | nodep2 = node_next(s, nodep1); | ||
1279 | if (!nodep2) | ||
1280 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
1281 | |||
1282 | /* | ||
1283 | * There is a second node. | ||
1284 | * If it is not adjacent to the first node, then there is a gap | ||
1285 | * of cleared bits between the nodes, and the next cleared bit | ||
1286 | * is the first bit within the gap. | ||
1287 | */ | ||
1288 | if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) | ||
1289 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
1290 | |||
1291 | /* | ||
1292 | * Second node is adjacent to the first node. | ||
1293 | * Because it is adjacent, its mask should be non-zero. If all | ||
1294 | * its mask bits are set, then with it being adjacent, it should | ||
1295 | * have had the mask bits moved into the num_after setting of the | ||
1296 | * previous node. | ||
1297 | */ | ||
1298 | return node_first_clear(nodep2, 0); | ||
1299 | } | ||
1300 | |||
1301 | /* Starting with the index 1 greater than the index given by start, finds | ||
1302 | * and returns the index of the first sequence of num consecutively set | ||
1303 | * bits. Returns a value of 0 of no such sequence exists. | ||
1304 | */ | ||
1305 | sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, | ||
1306 | sparsebit_idx_t start, sparsebit_num_t num) | ||
1307 | { | ||
1308 | sparsebit_idx_t idx; | ||
1309 | |||
1310 | assert(num >= 1); | ||
1311 | |||
1312 | for (idx = sparsebit_next_set(s, start); | ||
1313 | idx != 0 && idx + num - 1 >= idx; | ||
1314 | idx = sparsebit_next_set(s, idx)) { | ||
1315 | assert(sparsebit_is_set(s, idx)); | ||
1316 | |||
1317 | /* | ||
1318 | * Does the sequence of bits starting at idx consist of | ||
1319 | * num set bits? | ||
1320 | */ | ||
1321 | if (sparsebit_is_set_num(s, idx, num)) | ||
1322 | return idx; | ||
1323 | |||
1324 | /* | ||
1325 | * Sequence of set bits at idx isn't large enough. | ||
1326 | * Skip this entire sequence of set bits. | ||
1327 | */ | ||
1328 | idx = sparsebit_next_clear(s, idx); | ||
1329 | if (idx == 0) | ||
1330 | return 0; | ||
1331 | } | ||
1332 | |||
1333 | return 0; | ||
1334 | } | ||
1335 | |||
1336 | /* Starting with the index 1 greater than the index given by start, finds | ||
1337 | * and returns the index of the first sequence of num consecutively cleared | ||
1338 | * bits. Returns a value of 0 of no such sequence exists. | ||
1339 | */ | ||
1340 | sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, | ||
1341 | sparsebit_idx_t start, sparsebit_num_t num) | ||
1342 | { | ||
1343 | sparsebit_idx_t idx; | ||
1344 | |||
1345 | assert(num >= 1); | ||
1346 | |||
1347 | for (idx = sparsebit_next_clear(s, start); | ||
1348 | idx != 0 && idx + num - 1 >= idx; | ||
1349 | idx = sparsebit_next_clear(s, idx)) { | ||
1350 | assert(sparsebit_is_clear(s, idx)); | ||
1351 | |||
1352 | /* | ||
1353 | * Does the sequence of bits starting at idx consist of | ||
1354 | * num cleared bits? | ||
1355 | */ | ||
1356 | if (sparsebit_is_clear_num(s, idx, num)) | ||
1357 | return idx; | ||
1358 | |||
1359 | /* | ||
1360 | * Sequence of cleared bits at idx isn't large enough. | ||
1361 | * Skip this entire sequence of cleared bits. | ||
1362 | */ | ||
1363 | idx = sparsebit_next_set(s, idx); | ||
1364 | if (idx == 0) | ||
1365 | return 0; | ||
1366 | } | ||
1367 | |||
1368 | return 0; | ||
1369 | } | ||
1370 | |||
1371 | /* Sets the bits * in the inclusive range idx through idx + num - 1. */ | ||
1372 | void sparsebit_set_num(struct sparsebit *s, | ||
1373 | sparsebit_idx_t start, sparsebit_num_t num) | ||
1374 | { | ||
1375 | struct node *nodep, *next; | ||
1376 | unsigned int n1; | ||
1377 | sparsebit_idx_t idx; | ||
1378 | sparsebit_num_t n; | ||
1379 | sparsebit_idx_t middle_start, middle_end; | ||
1380 | |||
1381 | assert(num > 0); | ||
1382 | assert(start + num - 1 >= start); | ||
1383 | |||
1384 | /* | ||
1385 | * Leading - bits before first mask boundary. | ||
1386 | * | ||
1387 | * TODO(lhuemill): With some effort it may be possible to | ||
1388 | * replace the following loop with a sequential sequence | ||
1389 | * of statements. High level sequence would be: | ||
1390 | * | ||
1391 | * 1. Use node_split() to force node that describes setting | ||
1392 | * of idx to be within the mask portion of a node. | ||
1393 | * 2. Form mask of bits to be set. | ||
1394 | * 3. Determine number of mask bits already set in the node | ||
1395 | * and store in a local variable named num_already_set. | ||
1396 | * 4. Set the appropriate mask bits within the node. | ||
1397 | * 5. Increment struct sparsebit_pvt num_set member | ||
1398 | * by the number of bits that were actually set. | ||
1399 | * Exclude from the counts bits that were already set. | ||
1400 | * 6. Before returning to the caller, use node_reduce() to | ||
1401 | * handle the multiple corner cases that this method | ||
1402 | * introduces. | ||
1403 | */ | ||
1404 | for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) | ||
1405 | bit_set(s, idx); | ||
1406 | |||
1407 | /* Middle - bits spanning one or more entire mask */ | ||
1408 | middle_start = idx; | ||
1409 | middle_end = middle_start + (n & -MASK_BITS) - 1; | ||
1410 | if (n >= MASK_BITS) { | ||
1411 | nodep = node_split(s, middle_start); | ||
1412 | |||
1413 | /* | ||
1414 | * As needed, split just after end of middle bits. | ||
1415 | * No split needed if end of middle bits is at highest | ||
1416 | * supported bit index. | ||
1417 | */ | ||
1418 | if (middle_end + 1 > middle_end) | ||
1419 | (void) node_split(s, middle_end + 1); | ||
1420 | |||
1421 | /* Delete nodes that only describe bits within the middle. */ | ||
1422 | for (next = node_next(s, nodep); | ||
1423 | next && (next->idx < middle_end); | ||
1424 | next = node_next(s, nodep)) { | ||
1425 | assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); | ||
1426 | node_rm(s, next); | ||
1427 | next = NULL; | ||
1428 | } | ||
1429 | |||
1430 | /* As needed set each of the mask bits */ | ||
1431 | for (n1 = 0; n1 < MASK_BITS; n1++) { | ||
1432 | if (!(nodep->mask & (1 << n1))) { | ||
1433 | nodep->mask |= 1 << n1; | ||
1434 | s->num_set++; | ||
1435 | } | ||
1436 | } | ||
1437 | |||
1438 | s->num_set -= nodep->num_after; | ||
1439 | nodep->num_after = middle_end - middle_start + 1 - MASK_BITS; | ||
1440 | s->num_set += nodep->num_after; | ||
1441 | |||
1442 | node_reduce(s, nodep); | ||
1443 | } | ||
1444 | idx = middle_end + 1; | ||
1445 | n -= middle_end - middle_start + 1; | ||
1446 | |||
1447 | /* Trailing - bits at and beyond last mask boundary */ | ||
1448 | assert(n < MASK_BITS); | ||
1449 | for (; n > 0; idx++, n--) | ||
1450 | bit_set(s, idx); | ||
1451 | } | ||
1452 | |||
1453 | /* Clears the bits * in the inclusive range idx through idx + num - 1. */ | ||
1454 | void sparsebit_clear_num(struct sparsebit *s, | ||
1455 | sparsebit_idx_t start, sparsebit_num_t num) | ||
1456 | { | ||
1457 | struct node *nodep, *next; | ||
1458 | unsigned int n1; | ||
1459 | sparsebit_idx_t idx; | ||
1460 | sparsebit_num_t n; | ||
1461 | sparsebit_idx_t middle_start, middle_end; | ||
1462 | |||
1463 | assert(num > 0); | ||
1464 | assert(start + num - 1 >= start); | ||
1465 | |||
1466 | /* Leading - bits before first mask boundary */ | ||
1467 | for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) | ||
1468 | bit_clear(s, idx); | ||
1469 | |||
1470 | /* Middle - bits spanning one or more entire mask */ | ||
1471 | middle_start = idx; | ||
1472 | middle_end = middle_start + (n & -MASK_BITS) - 1; | ||
1473 | if (n >= MASK_BITS) { | ||
1474 | nodep = node_split(s, middle_start); | ||
1475 | |||
1476 | /* | ||
1477 | * As needed, split just after end of middle bits. | ||
1478 | * No split needed if end of middle bits is at highest | ||
1479 | * supported bit index. | ||
1480 | */ | ||
1481 | if (middle_end + 1 > middle_end) | ||
1482 | (void) node_split(s, middle_end + 1); | ||
1483 | |||
1484 | /* Delete nodes that only describe bits within the middle. */ | ||
1485 | for (next = node_next(s, nodep); | ||
1486 | next && (next->idx < middle_end); | ||
1487 | next = node_next(s, nodep)) { | ||
1488 | assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); | ||
1489 | node_rm(s, next); | ||
1490 | next = NULL; | ||
1491 | } | ||
1492 | |||
1493 | /* As needed clear each of the mask bits */ | ||
1494 | for (n1 = 0; n1 < MASK_BITS; n1++) { | ||
1495 | if (nodep->mask & (1 << n1)) { | ||
1496 | nodep->mask &= ~(1 << n1); | ||
1497 | s->num_set--; | ||
1498 | } | ||
1499 | } | ||
1500 | |||
1501 | /* Clear any bits described by num_after */ | ||
1502 | s->num_set -= nodep->num_after; | ||
1503 | nodep->num_after = 0; | ||
1504 | |||
1505 | /* | ||
1506 | * Delete the node that describes the beginning of | ||
1507 | * the middle bits and perform any allowed reductions | ||
1508 | * with the nodes prev or next of nodep. | ||
1509 | */ | ||
1510 | node_reduce(s, nodep); | ||
1511 | nodep = NULL; | ||
1512 | } | ||
1513 | idx = middle_end + 1; | ||
1514 | n -= middle_end - middle_start + 1; | ||
1515 | |||
1516 | /* Trailing - bits at and beyond last mask boundary */ | ||
1517 | assert(n < MASK_BITS); | ||
1518 | for (; n > 0; idx++, n--) | ||
1519 | bit_clear(s, idx); | ||
1520 | } | ||
1521 | |||
1522 | /* Sets the bit at the index given by idx. */ | ||
1523 | void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx) | ||
1524 | { | ||
1525 | sparsebit_set_num(s, idx, 1); | ||
1526 | } | ||
1527 | |||
1528 | /* Clears the bit at the index given by idx. */ | ||
1529 | void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx) | ||
1530 | { | ||
1531 | sparsebit_clear_num(s, idx, 1); | ||
1532 | } | ||
1533 | |||
1534 | /* Sets the bits in the entire addressable range of the sparsebit array. */ | ||
1535 | void sparsebit_set_all(struct sparsebit *s) | ||
1536 | { | ||
1537 | sparsebit_set(s, 0); | ||
1538 | sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0); | ||
1539 | assert(sparsebit_all_set(s)); | ||
1540 | } | ||
1541 | |||
1542 | /* Clears the bits in the entire addressable range of the sparsebit array. */ | ||
1543 | void sparsebit_clear_all(struct sparsebit *s) | ||
1544 | { | ||
1545 | sparsebit_clear(s, 0); | ||
1546 | sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0); | ||
1547 | assert(!sparsebit_any_set(s)); | ||
1548 | } | ||
1549 | |||
1550 | static size_t display_range(FILE *stream, sparsebit_idx_t low, | ||
1551 | sparsebit_idx_t high, bool prepend_comma_space) | ||
1552 | { | ||
1553 | char *fmt_str; | ||
1554 | size_t sz; | ||
1555 | |||
1556 | /* Determine the printf format string */ | ||
1557 | if (low == high) | ||
1558 | fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx"; | ||
1559 | else | ||
1560 | fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx"; | ||
1561 | |||
1562 | /* | ||
1563 | * When stream is NULL, just determine the size of what would | ||
1564 | * have been printed, else print the range. | ||
1565 | */ | ||
1566 | if (!stream) | ||
1567 | sz = snprintf(NULL, 0, fmt_str, low, high); | ||
1568 | else | ||
1569 | sz = fprintf(stream, fmt_str, low, high); | ||
1570 | |||
1571 | return sz; | ||
1572 | } | ||
1573 | |||
1574 | |||
1575 | /* Dumps to the FILE stream given by stream, the bit settings | ||
1576 | * of s. Each line of output is prefixed with the number of | ||
1577 | * spaces given by indent. The length of each line is implementation | ||
1578 | * dependent and does not depend on the indent amount. The following | ||
1579 | * is an example output of a sparsebit array that has bits: | ||
1580 | * | ||
1581 | * 0x5, 0x8, 0xa:0xe, 0x12 | ||
1582 | * | ||
1583 | * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18 | ||
1584 | * are set. Note that a ':', instead of a '-' is used to specify a range of | ||
1585 | * contiguous bits. This is done because '-' is used to specify command-line | ||
1586 | * options, and sometimes ranges are specified as command-line arguments. | ||
1587 | */ | ||
1588 | void sparsebit_dump(FILE *stream, struct sparsebit *s, | ||
1589 | unsigned int indent) | ||
1590 | { | ||
1591 | size_t current_line_len = 0; | ||
1592 | size_t sz; | ||
1593 | struct node *nodep; | ||
1594 | |||
1595 | if (!sparsebit_any_set(s)) | ||
1596 | return; | ||
1597 | |||
1598 | /* Display initial indent */ | ||
1599 | fprintf(stream, "%*s", indent, ""); | ||
1600 | |||
1601 | /* For each node */ | ||
1602 | for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) { | ||
1603 | unsigned int n1; | ||
1604 | sparsebit_idx_t low, high; | ||
1605 | |||
1606 | /* For each group of bits in the mask */ | ||
1607 | for (n1 = 0; n1 < MASK_BITS; n1++) { | ||
1608 | if (nodep->mask & (1 << n1)) { | ||
1609 | low = high = nodep->idx + n1; | ||
1610 | |||
1611 | for (; n1 < MASK_BITS; n1++) { | ||
1612 | if (nodep->mask & (1 << n1)) | ||
1613 | high = nodep->idx + n1; | ||
1614 | else | ||
1615 | break; | ||
1616 | } | ||
1617 | |||
1618 | if ((n1 == MASK_BITS) && nodep->num_after) | ||
1619 | high += nodep->num_after; | ||
1620 | |||
1621 | /* | ||
1622 | * How much room will it take to display | ||
1623 | * this range. | ||
1624 | */ | ||
1625 | sz = display_range(NULL, low, high, | ||
1626 | current_line_len != 0); | ||
1627 | |||
1628 | /* | ||
1629 | * If there is not enough room, display | ||
1630 | * a newline plus the indent of the next | ||
1631 | * line. | ||
1632 | */ | ||
1633 | if (current_line_len + sz > DUMP_LINE_MAX) { | ||
1634 | fputs("\n", stream); | ||
1635 | fprintf(stream, "%*s", indent, ""); | ||
1636 | current_line_len = 0; | ||
1637 | } | ||
1638 | |||
1639 | /* Display the range */ | ||
1640 | sz = display_range(stream, low, high, | ||
1641 | current_line_len != 0); | ||
1642 | current_line_len += sz; | ||
1643 | } | ||
1644 | } | ||
1645 | |||
1646 | /* | ||
1647 | * If num_after and most significant-bit of mask is not | ||
1648 | * set, then still need to display a range for the bits | ||
1649 | * described by num_after. | ||
1650 | */ | ||
1651 | if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) { | ||
1652 | low = nodep->idx + MASK_BITS; | ||
1653 | high = nodep->idx + MASK_BITS + nodep->num_after - 1; | ||
1654 | |||
1655 | /* | ||
1656 | * How much room will it take to display | ||
1657 | * this range. | ||
1658 | */ | ||
1659 | sz = display_range(NULL, low, high, | ||
1660 | current_line_len != 0); | ||
1661 | |||
1662 | /* | ||
1663 | * If there is not enough room, display | ||
1664 | * a newline plus the indent of the next | ||
1665 | * line. | ||
1666 | */ | ||
1667 | if (current_line_len + sz > DUMP_LINE_MAX) { | ||
1668 | fputs("\n", stream); | ||
1669 | fprintf(stream, "%*s", indent, ""); | ||
1670 | current_line_len = 0; | ||
1671 | } | ||
1672 | |||
1673 | /* Display the range */ | ||
1674 | sz = display_range(stream, low, high, | ||
1675 | current_line_len != 0); | ||
1676 | current_line_len += sz; | ||
1677 | } | ||
1678 | } | ||
1679 | fputs("\n", stream); | ||
1680 | } | ||
1681 | |||
1682 | /* Validates the internal state of the sparsebit array given by | ||
1683 | * s. On error, diagnostic information is printed to stderr and | ||
1684 | * abort is called. | ||
1685 | */ | ||
1686 | void sparsebit_validate_internal(struct sparsebit *s) | ||
1687 | { | ||
1688 | bool error_detected = false; | ||
1689 | struct node *nodep, *prev = NULL; | ||
1690 | sparsebit_num_t total_bits_set = 0; | ||
1691 | unsigned int n1; | ||
1692 | |||
1693 | /* For each node */ | ||
1694 | for (nodep = node_first(s); nodep; | ||
1695 | prev = nodep, nodep = node_next(s, nodep)) { | ||
1696 | |||
1697 | /* | ||
1698 | * Increase total bits set by the number of bits set | ||
1699 | * in this node. | ||
1700 | */ | ||
1701 | for (n1 = 0; n1 < MASK_BITS; n1++) | ||
1702 | if (nodep->mask & (1 << n1)) | ||
1703 | total_bits_set++; | ||
1704 | |||
1705 | total_bits_set += nodep->num_after; | ||
1706 | |||
1707 | /* | ||
1708 | * Arbitrary choice as to whether a mask of 0 is allowed | ||
1709 | * or not. For diagnostic purposes it is beneficial to | ||
1710 | * have only one valid means to represent a set of bits. | ||
1711 | * To support this an arbitrary choice has been made | ||
1712 | * to not allow a mask of zero. | ||
1713 | */ | ||
1714 | if (nodep->mask == 0) { | ||
1715 | fprintf(stderr, "Node mask of zero, " | ||
1716 | "nodep: %p nodep->mask: 0x%x", | ||
1717 | nodep, nodep->mask); | ||
1718 | error_detected = true; | ||
1719 | break; | ||
1720 | } | ||
1721 | |||
1722 | /* | ||
1723 | * Validate num_after is not greater than the max index | ||
1724 | * - the number of mask bits. The num_after member | ||
1725 | * uses 0-based indexing and thus has no value that | ||
1726 | * represents all bits set. This limitation is handled | ||
1727 | * by requiring a non-zero mask. With a non-zero mask, | ||
1728 | * MASK_BITS worth of bits are described by the mask, | ||
1729 | * which makes the largest needed num_after equal to: | ||
1730 | * | ||
1731 | * (~(sparsebit_num_t) 0) - MASK_BITS + 1 | ||
1732 | */ | ||
1733 | if (nodep->num_after | ||
1734 | > (~(sparsebit_num_t) 0) - MASK_BITS + 1) { | ||
1735 | fprintf(stderr, "num_after too large, " | ||
1736 | "nodep: %p nodep->num_after: 0x%lx", | ||
1737 | nodep, nodep->num_after); | ||
1738 | error_detected = true; | ||
1739 | break; | ||
1740 | } | ||
1741 | |||
1742 | /* Validate node index is divisible by the mask size */ | ||
1743 | if (nodep->idx % MASK_BITS) { | ||
1744 | fprintf(stderr, "Node index not divisable by " | ||
1745 | "mask size,\n" | ||
1746 | " nodep: %p nodep->idx: 0x%lx " | ||
1747 | "MASK_BITS: %lu\n", | ||
1748 | nodep, nodep->idx, MASK_BITS); | ||
1749 | error_detected = true; | ||
1750 | break; | ||
1751 | } | ||
1752 | |||
1753 | /* | ||
1754 | * Validate bits described by node don't wrap beyond the | ||
1755 | * highest supported index. | ||
1756 | */ | ||
1757 | if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) { | ||
1758 | fprintf(stderr, "Bits described by node wrap " | ||
1759 | "beyond highest supported index,\n" | ||
1760 | " nodep: %p nodep->idx: 0x%lx\n" | ||
1761 | " MASK_BITS: %lu nodep->num_after: 0x%lx", | ||
1762 | nodep, nodep->idx, MASK_BITS, nodep->num_after); | ||
1763 | error_detected = true; | ||
1764 | break; | ||
1765 | } | ||
1766 | |||
1767 | /* Check parent pointers. */ | ||
1768 | if (nodep->left) { | ||
1769 | if (nodep->left->parent != nodep) { | ||
1770 | fprintf(stderr, "Left child parent pointer " | ||
1771 | "doesn't point to this node,\n" | ||
1772 | " nodep: %p nodep->left: %p " | ||
1773 | "nodep->left->parent: %p", | ||
1774 | nodep, nodep->left, | ||
1775 | nodep->left->parent); | ||
1776 | error_detected = true; | ||
1777 | break; | ||
1778 | } | ||
1779 | } | ||
1780 | |||
1781 | if (nodep->right) { | ||
1782 | if (nodep->right->parent != nodep) { | ||
1783 | fprintf(stderr, "Right child parent pointer " | ||
1784 | "doesn't point to this node,\n" | ||
1785 | " nodep: %p nodep->right: %p " | ||
1786 | "nodep->right->parent: %p", | ||
1787 | nodep, nodep->right, | ||
1788 | nodep->right->parent); | ||
1789 | error_detected = true; | ||
1790 | break; | ||
1791 | } | ||
1792 | } | ||
1793 | |||
1794 | if (!nodep->parent) { | ||
1795 | if (s->root != nodep) { | ||
1796 | fprintf(stderr, "Unexpected root node, " | ||
1797 | "s->root: %p nodep: %p", | ||
1798 | s->root, nodep); | ||
1799 | error_detected = true; | ||
1800 | break; | ||
1801 | } | ||
1802 | } | ||
1803 | |||
1804 | if (prev) { | ||
1805 | /* | ||
1806 | * Is index of previous node before index of | ||
1807 | * current node? | ||
1808 | */ | ||
1809 | if (prev->idx >= nodep->idx) { | ||
1810 | fprintf(stderr, "Previous node index " | ||
1811 | ">= current node index,\n" | ||
1812 | " prev: %p prev->idx: 0x%lx\n" | ||
1813 | " nodep: %p nodep->idx: 0x%lx", | ||
1814 | prev, prev->idx, nodep, nodep->idx); | ||
1815 | error_detected = true; | ||
1816 | break; | ||
1817 | } | ||
1818 | |||
1819 | /* | ||
1820 | * Nodes occur in asscending order, based on each | ||
1821 | * nodes starting index. | ||
1822 | */ | ||
1823 | if ((prev->idx + MASK_BITS + prev->num_after - 1) | ||
1824 | >= nodep->idx) { | ||
1825 | fprintf(stderr, "Previous node bit range " | ||
1826 | "overlap with current node bit range,\n" | ||
1827 | " prev: %p prev->idx: 0x%lx " | ||
1828 | "prev->num_after: 0x%lx\n" | ||
1829 | " nodep: %p nodep->idx: 0x%lx " | ||
1830 | "nodep->num_after: 0x%lx\n" | ||
1831 | " MASK_BITS: %lu", | ||
1832 | prev, prev->idx, prev->num_after, | ||
1833 | nodep, nodep->idx, nodep->num_after, | ||
1834 | MASK_BITS); | ||
1835 | error_detected = true; | ||
1836 | break; | ||
1837 | } | ||
1838 | |||
1839 | /* | ||
1840 | * When the node has all mask bits set, it shouldn't | ||
1841 | * be adjacent to the last bit described by the | ||
1842 | * previous node. | ||
1843 | */ | ||
1844 | if (nodep->mask == ~(mask_t) 0 && | ||
1845 | prev->idx + MASK_BITS + prev->num_after == nodep->idx) { | ||
1846 | fprintf(stderr, "Current node has mask with " | ||
1847 | "all bits set and is adjacent to the " | ||
1848 | "previous node,\n" | ||
1849 | " prev: %p prev->idx: 0x%lx " | ||
1850 | "prev->num_after: 0x%lx\n" | ||
1851 | " nodep: %p nodep->idx: 0x%lx " | ||
1852 | "nodep->num_after: 0x%lx\n" | ||
1853 | " MASK_BITS: %lu", | ||
1854 | prev, prev->idx, prev->num_after, | ||
1855 | nodep, nodep->idx, nodep->num_after, | ||
1856 | MASK_BITS); | ||
1857 | |||
1858 | error_detected = true; | ||
1859 | break; | ||
1860 | } | ||
1861 | } | ||
1862 | } | ||
1863 | |||
1864 | if (!error_detected) { | ||
1865 | /* | ||
1866 | * Is sum of bits set in each node equal to the count | ||
1867 | * of total bits set. | ||
1868 | */ | ||
1869 | if (s->num_set != total_bits_set) { | ||
1870 | fprintf(stderr, "Number of bits set missmatch,\n" | ||
1871 | " s->num_set: 0x%lx total_bits_set: 0x%lx", | ||
1872 | s->num_set, total_bits_set); | ||
1873 | |||
1874 | error_detected = true; | ||
1875 | } | ||
1876 | } | ||
1877 | |||
1878 | if (error_detected) { | ||
1879 | fputs(" dump_internal:\n", stderr); | ||
1880 | sparsebit_dump_internal(stderr, s, 4); | ||
1881 | abort(); | ||
1882 | } | ||
1883 | } | ||
1884 | |||
1885 | |||
1886 | #ifdef FUZZ | ||
1887 | /* A simple but effective fuzzing driver. Look for bugs with the help | ||
1888 | * of some invariants and of a trivial representation of sparsebit. | ||
1889 | * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let | ||
1890 | * afl-fuzz do the magic. :) | ||
1891 | */ | ||
1892 | |||
1893 | #include <stdlib.h> | ||
1894 | #include <assert.h> | ||
1895 | |||
1896 | struct range { | ||
1897 | sparsebit_idx_t first, last; | ||
1898 | bool set; | ||
1899 | }; | ||
1900 | |||
1901 | struct sparsebit *s; | ||
1902 | struct range ranges[1000]; | ||
1903 | int num_ranges; | ||
1904 | |||
1905 | static bool get_value(sparsebit_idx_t idx) | ||
1906 | { | ||
1907 | int i; | ||
1908 | |||
1909 | for (i = num_ranges; --i >= 0; ) | ||
1910 | if (ranges[i].first <= idx && idx <= ranges[i].last) | ||
1911 | return ranges[i].set; | ||
1912 | |||
1913 | return false; | ||
1914 | } | ||
1915 | |||
1916 | static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last) | ||
1917 | { | ||
1918 | sparsebit_num_t num; | ||
1919 | sparsebit_idx_t next; | ||
1920 | |||
1921 | if (first < last) { | ||
1922 | num = last - first + 1; | ||
1923 | } else { | ||
1924 | num = first - last + 1; | ||
1925 | first = last; | ||
1926 | last = first + num - 1; | ||
1927 | } | ||
1928 | |||
1929 | switch (code) { | ||
1930 | case 0: | ||
1931 | sparsebit_set(s, first); | ||
1932 | assert(sparsebit_is_set(s, first)); | ||
1933 | assert(!sparsebit_is_clear(s, first)); | ||
1934 | assert(sparsebit_any_set(s)); | ||
1935 | assert(!sparsebit_all_clear(s)); | ||
1936 | if (get_value(first)) | ||
1937 | return; | ||
1938 | if (num_ranges == 1000) | ||
1939 | exit(0); | ||
1940 | ranges[num_ranges++] = (struct range) | ||
1941 | { .first = first, .last = first, .set = true }; | ||
1942 | break; | ||
1943 | case 1: | ||
1944 | sparsebit_clear(s, first); | ||
1945 | assert(!sparsebit_is_set(s, first)); | ||
1946 | assert(sparsebit_is_clear(s, first)); | ||
1947 | assert(sparsebit_any_clear(s)); | ||
1948 | assert(!sparsebit_all_set(s)); | ||
1949 | if (!get_value(first)) | ||
1950 | return; | ||
1951 | if (num_ranges == 1000) | ||
1952 | exit(0); | ||
1953 | ranges[num_ranges++] = (struct range) | ||
1954 | { .first = first, .last = first, .set = false }; | ||
1955 | break; | ||
1956 | case 2: | ||
1957 | assert(sparsebit_is_set(s, first) == get_value(first)); | ||
1958 | assert(sparsebit_is_clear(s, first) == !get_value(first)); | ||
1959 | break; | ||
1960 | case 3: | ||
1961 | if (sparsebit_any_set(s)) | ||
1962 | assert(get_value(sparsebit_first_set(s))); | ||
1963 | if (sparsebit_any_clear(s)) | ||
1964 | assert(!get_value(sparsebit_first_clear(s))); | ||
1965 | sparsebit_set_all(s); | ||
1966 | assert(!sparsebit_any_clear(s)); | ||
1967 | assert(sparsebit_all_set(s)); | ||
1968 | num_ranges = 0; | ||
1969 | ranges[num_ranges++] = (struct range) | ||
1970 | { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true }; | ||
1971 | break; | ||
1972 | case 4: | ||
1973 | if (sparsebit_any_set(s)) | ||
1974 | assert(get_value(sparsebit_first_set(s))); | ||
1975 | if (sparsebit_any_clear(s)) | ||
1976 | assert(!get_value(sparsebit_first_clear(s))); | ||
1977 | sparsebit_clear_all(s); | ||
1978 | assert(!sparsebit_any_set(s)); | ||
1979 | assert(sparsebit_all_clear(s)); | ||
1980 | num_ranges = 0; | ||
1981 | break; | ||
1982 | case 5: | ||
1983 | next = sparsebit_next_set(s, first); | ||
1984 | assert(next == 0 || next > first); | ||
1985 | assert(next == 0 || get_value(next)); | ||
1986 | break; | ||
1987 | case 6: | ||
1988 | next = sparsebit_next_clear(s, first); | ||
1989 | assert(next == 0 || next > first); | ||
1990 | assert(next == 0 || !get_value(next)); | ||
1991 | break; | ||
1992 | case 7: | ||
1993 | next = sparsebit_next_clear(s, first); | ||
1994 | if (sparsebit_is_set_num(s, first, num)) { | ||
1995 | assert(next == 0 || next > last); | ||
1996 | if (first) | ||
1997 | next = sparsebit_next_set(s, first - 1); | ||
1998 | else if (sparsebit_any_set(s)) | ||
1999 | next = sparsebit_first_set(s); | ||
2000 | else | ||
2001 | return; | ||
2002 | assert(next == first); | ||
2003 | } else { | ||
2004 | assert(sparsebit_is_clear(s, first) || next <= last); | ||
2005 | } | ||
2006 | break; | ||
2007 | case 8: | ||
2008 | next = sparsebit_next_set(s, first); | ||
2009 | if (sparsebit_is_clear_num(s, first, num)) { | ||
2010 | assert(next == 0 || next > last); | ||
2011 | if (first) | ||
2012 | next = sparsebit_next_clear(s, first - 1); | ||
2013 | else if (sparsebit_any_clear(s)) | ||
2014 | next = sparsebit_first_clear(s); | ||
2015 | else | ||
2016 | return; | ||
2017 | assert(next == first); | ||
2018 | } else { | ||
2019 | assert(sparsebit_is_set(s, first) || next <= last); | ||
2020 | } | ||
2021 | break; | ||
2022 | case 9: | ||
2023 | sparsebit_set_num(s, first, num); | ||
2024 | assert(sparsebit_is_set_num(s, first, num)); | ||
2025 | assert(!sparsebit_is_clear_num(s, first, num)); | ||
2026 | assert(sparsebit_any_set(s)); | ||
2027 | assert(!sparsebit_all_clear(s)); | ||
2028 | if (num_ranges == 1000) | ||
2029 | exit(0); | ||
2030 | ranges[num_ranges++] = (struct range) | ||
2031 | { .first = first, .last = last, .set = true }; | ||
2032 | break; | ||
2033 | case 10: | ||
2034 | sparsebit_clear_num(s, first, num); | ||
2035 | assert(!sparsebit_is_set_num(s, first, num)); | ||
2036 | assert(sparsebit_is_clear_num(s, first, num)); | ||
2037 | assert(sparsebit_any_clear(s)); | ||
2038 | assert(!sparsebit_all_set(s)); | ||
2039 | if (num_ranges == 1000) | ||
2040 | exit(0); | ||
2041 | ranges[num_ranges++] = (struct range) | ||
2042 | { .first = first, .last = last, .set = false }; | ||
2043 | break; | ||
2044 | case 11: | ||
2045 | sparsebit_validate_internal(s); | ||
2046 | break; | ||
2047 | default: | ||
2048 | break; | ||
2049 | } | ||
2050 | } | ||
2051 | |||
2052 | unsigned char get8(void) | ||
2053 | { | ||
2054 | int ch; | ||
2055 | |||
2056 | ch = getchar(); | ||
2057 | if (ch == EOF) | ||
2058 | exit(0); | ||
2059 | return ch; | ||
2060 | } | ||
2061 | |||
2062 | uint64_t get64(void) | ||
2063 | { | ||
2064 | uint64_t x; | ||
2065 | |||
2066 | x = get8(); | ||
2067 | x = (x << 8) | get8(); | ||
2068 | x = (x << 8) | get8(); | ||
2069 | x = (x << 8) | get8(); | ||
2070 | x = (x << 8) | get8(); | ||
2071 | x = (x << 8) | get8(); | ||
2072 | x = (x << 8) | get8(); | ||
2073 | return (x << 8) | get8(); | ||
2074 | } | ||
2075 | |||
2076 | int main(void) | ||
2077 | { | ||
2078 | s = sparsebit_alloc(); | ||
2079 | for (;;) { | ||
2080 | uint8_t op = get8() & 0xf; | ||
2081 | uint64_t first = get64(); | ||
2082 | uint64_t last = get64(); | ||
2083 | |||
2084 | operate(op, first, last); | ||
2085 | } | ||
2086 | } | ||
2087 | #endif | ||
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c new file mode 100644 index 000000000000..12df46280b23 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86.c | |||
@@ -0,0 +1,697 @@ | |||
1 | /* | ||
2 | * tools/testing/selftests/kvm/lib/x86.c | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | */ | ||
8 | |||
9 | #define _GNU_SOURCE /* for program_invocation_name */ | ||
10 | |||
11 | #include "test_util.h" | ||
12 | #include "kvm_util.h" | ||
13 | #include "kvm_util_internal.h" | ||
14 | #include "x86.h" | ||
15 | |||
16 | /* Minimum physical address used for virtual translation tables. */ | ||
17 | #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 | ||
18 | |||
19 | /* Virtual translation table structure declarations */ | ||
20 | struct pageMapL4Entry { | ||
21 | uint64_t present:1; | ||
22 | uint64_t writable:1; | ||
23 | uint64_t user:1; | ||
24 | uint64_t write_through:1; | ||
25 | uint64_t cache_disable:1; | ||
26 | uint64_t accessed:1; | ||
27 | uint64_t ignored_06:1; | ||
28 | uint64_t page_size:1; | ||
29 | uint64_t ignored_11_08:4; | ||
30 | uint64_t address:40; | ||
31 | uint64_t ignored_62_52:11; | ||
32 | uint64_t execute_disable:1; | ||
33 | }; | ||
34 | |||
35 | struct pageDirectoryPointerEntry { | ||
36 | uint64_t present:1; | ||
37 | uint64_t writable:1; | ||
38 | uint64_t user:1; | ||
39 | uint64_t write_through:1; | ||
40 | uint64_t cache_disable:1; | ||
41 | uint64_t accessed:1; | ||
42 | uint64_t ignored_06:1; | ||
43 | uint64_t page_size:1; | ||
44 | uint64_t ignored_11_08:4; | ||
45 | uint64_t address:40; | ||
46 | uint64_t ignored_62_52:11; | ||
47 | uint64_t execute_disable:1; | ||
48 | }; | ||
49 | |||
50 | struct pageDirectoryEntry { | ||
51 | uint64_t present:1; | ||
52 | uint64_t writable:1; | ||
53 | uint64_t user:1; | ||
54 | uint64_t write_through:1; | ||
55 | uint64_t cache_disable:1; | ||
56 | uint64_t accessed:1; | ||
57 | uint64_t ignored_06:1; | ||
58 | uint64_t page_size:1; | ||
59 | uint64_t ignored_11_08:4; | ||
60 | uint64_t address:40; | ||
61 | uint64_t ignored_62_52:11; | ||
62 | uint64_t execute_disable:1; | ||
63 | }; | ||
64 | |||
65 | struct pageTableEntry { | ||
66 | uint64_t present:1; | ||
67 | uint64_t writable:1; | ||
68 | uint64_t user:1; | ||
69 | uint64_t write_through:1; | ||
70 | uint64_t cache_disable:1; | ||
71 | uint64_t accessed:1; | ||
72 | uint64_t dirty:1; | ||
73 | uint64_t reserved_07:1; | ||
74 | uint64_t global:1; | ||
75 | uint64_t ignored_11_09:3; | ||
76 | uint64_t address:40; | ||
77 | uint64_t ignored_62_52:11; | ||
78 | uint64_t execute_disable:1; | ||
79 | }; | ||
80 | |||
81 | /* Register Dump | ||
82 | * | ||
83 | * Input Args: | ||
84 | * indent - Left margin indent amount | ||
85 | * regs - register | ||
86 | * | ||
87 | * Output Args: | ||
88 | * stream - Output FILE stream | ||
89 | * | ||
90 | * Return: None | ||
91 | * | ||
92 | * Dumps the state of the registers given by regs, to the FILE stream | ||
93 | * given by steam. | ||
94 | */ | ||
95 | void regs_dump(FILE *stream, struct kvm_regs *regs, | ||
96 | uint8_t indent) | ||
97 | { | ||
98 | fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " | ||
99 | "rcx: 0x%.16llx rdx: 0x%.16llx\n", | ||
100 | indent, "", | ||
101 | regs->rax, regs->rbx, regs->rcx, regs->rdx); | ||
102 | fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " | ||
103 | "rsp: 0x%.16llx rbp: 0x%.16llx\n", | ||
104 | indent, "", | ||
105 | regs->rsi, regs->rdi, regs->rsp, regs->rbp); | ||
106 | fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " | ||
107 | "r10: 0x%.16llx r11: 0x%.16llx\n", | ||
108 | indent, "", | ||
109 | regs->r8, regs->r9, regs->r10, regs->r11); | ||
110 | fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " | ||
111 | "r14: 0x%.16llx r15: 0x%.16llx\n", | ||
112 | indent, "", | ||
113 | regs->r12, regs->r13, regs->r14, regs->r15); | ||
114 | fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", | ||
115 | indent, "", | ||
116 | regs->rip, regs->rflags); | ||
117 | } | ||
118 | |||
119 | /* Segment Dump | ||
120 | * | ||
121 | * Input Args: | ||
122 | * indent - Left margin indent amount | ||
123 | * segment - KVM segment | ||
124 | * | ||
125 | * Output Args: | ||
126 | * stream - Output FILE stream | ||
127 | * | ||
128 | * Return: None | ||
129 | * | ||
130 | * Dumps the state of the KVM segment given by segment, to the FILE stream | ||
131 | * given by steam. | ||
132 | */ | ||
133 | static void segment_dump(FILE *stream, struct kvm_segment *segment, | ||
134 | uint8_t indent) | ||
135 | { | ||
136 | fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " | ||
137 | "selector: 0x%.4x type: 0x%.2x\n", | ||
138 | indent, "", segment->base, segment->limit, | ||
139 | segment->selector, segment->type); | ||
140 | fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " | ||
141 | "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", | ||
142 | indent, "", segment->present, segment->dpl, | ||
143 | segment->db, segment->s, segment->l); | ||
144 | fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " | ||
145 | "unusable: 0x%.2x padding: 0x%.2x\n", | ||
146 | indent, "", segment->g, segment->avl, | ||
147 | segment->unusable, segment->padding); | ||
148 | } | ||
149 | |||
150 | /* dtable Dump | ||
151 | * | ||
152 | * Input Args: | ||
153 | * indent - Left margin indent amount | ||
154 | * dtable - KVM dtable | ||
155 | * | ||
156 | * Output Args: | ||
157 | * stream - Output FILE stream | ||
158 | * | ||
159 | * Return: None | ||
160 | * | ||
161 | * Dumps the state of the KVM dtable given by dtable, to the FILE stream | ||
162 | * given by steam. | ||
163 | */ | ||
164 | static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, | ||
165 | uint8_t indent) | ||
166 | { | ||
167 | fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " | ||
168 | "padding: 0x%.4x 0x%.4x 0x%.4x\n", | ||
169 | indent, "", dtable->base, dtable->limit, | ||
170 | dtable->padding[0], dtable->padding[1], dtable->padding[2]); | ||
171 | } | ||
172 | |||
173 | /* System Register Dump | ||
174 | * | ||
175 | * Input Args: | ||
176 | * indent - Left margin indent amount | ||
177 | * sregs - System registers | ||
178 | * | ||
179 | * Output Args: | ||
180 | * stream - Output FILE stream | ||
181 | * | ||
182 | * Return: None | ||
183 | * | ||
184 | * Dumps the state of the system registers given by sregs, to the FILE stream | ||
185 | * given by steam. | ||
186 | */ | ||
187 | void sregs_dump(FILE *stream, struct kvm_sregs *sregs, | ||
188 | uint8_t indent) | ||
189 | { | ||
190 | unsigned int i; | ||
191 | |||
192 | fprintf(stream, "%*scs:\n", indent, ""); | ||
193 | segment_dump(stream, &sregs->cs, indent + 2); | ||
194 | fprintf(stream, "%*sds:\n", indent, ""); | ||
195 | segment_dump(stream, &sregs->ds, indent + 2); | ||
196 | fprintf(stream, "%*ses:\n", indent, ""); | ||
197 | segment_dump(stream, &sregs->es, indent + 2); | ||
198 | fprintf(stream, "%*sfs:\n", indent, ""); | ||
199 | segment_dump(stream, &sregs->fs, indent + 2); | ||
200 | fprintf(stream, "%*sgs:\n", indent, ""); | ||
201 | segment_dump(stream, &sregs->gs, indent + 2); | ||
202 | fprintf(stream, "%*sss:\n", indent, ""); | ||
203 | segment_dump(stream, &sregs->ss, indent + 2); | ||
204 | fprintf(stream, "%*str:\n", indent, ""); | ||
205 | segment_dump(stream, &sregs->tr, indent + 2); | ||
206 | fprintf(stream, "%*sldt:\n", indent, ""); | ||
207 | segment_dump(stream, &sregs->ldt, indent + 2); | ||
208 | |||
209 | fprintf(stream, "%*sgdt:\n", indent, ""); | ||
210 | dtable_dump(stream, &sregs->gdt, indent + 2); | ||
211 | fprintf(stream, "%*sidt:\n", indent, ""); | ||
212 | dtable_dump(stream, &sregs->idt, indent + 2); | ||
213 | |||
214 | fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " | ||
215 | "cr3: 0x%.16llx cr4: 0x%.16llx\n", | ||
216 | indent, "", | ||
217 | sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); | ||
218 | fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " | ||
219 | "apic_base: 0x%.16llx\n", | ||
220 | indent, "", | ||
221 | sregs->cr8, sregs->efer, sregs->apic_base); | ||
222 | |||
223 | fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); | ||
224 | for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { | ||
225 | fprintf(stream, "%*s%.16llx\n", indent + 2, "", | ||
226 | sregs->interrupt_bitmap[i]); | ||
227 | } | ||
228 | } | ||
229 | |||
230 | void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) | ||
231 | { | ||
232 | int rc; | ||
233 | |||
234 | TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " | ||
235 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); | ||
236 | |||
237 | /* If needed, create page map l4 table. */ | ||
238 | if (!vm->pgd_created) { | ||
239 | vm_paddr_t paddr = vm_phy_page_alloc(vm, | ||
240 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); | ||
241 | vm->pgd = paddr; | ||
242 | |||
243 | /* Set pointer to pgd tables in all the VCPUs that | ||
244 | * have already been created. Future VCPUs will have | ||
245 | * the value set as each one is created. | ||
246 | */ | ||
247 | for (struct vcpu *vcpu = vm->vcpu_head; vcpu; | ||
248 | vcpu = vcpu->next) { | ||
249 | struct kvm_sregs sregs; | ||
250 | |||
251 | /* Obtain the current system register settings */ | ||
252 | vcpu_sregs_get(vm, vcpu->id, &sregs); | ||
253 | |||
254 | /* Set and store the pointer to the start of the | ||
255 | * pgd tables. | ||
256 | */ | ||
257 | sregs.cr3 = vm->pgd; | ||
258 | vcpu_sregs_set(vm, vcpu->id, &sregs); | ||
259 | } | ||
260 | |||
261 | vm->pgd_created = true; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | /* VM Virtual Page Map | ||
266 | * | ||
267 | * Input Args: | ||
268 | * vm - Virtual Machine | ||
269 | * vaddr - VM Virtual Address | ||
270 | * paddr - VM Physical Address | ||
271 | * pgd_memslot - Memory region slot for new virtual translation tables | ||
272 | * | ||
273 | * Output Args: None | ||
274 | * | ||
275 | * Return: None | ||
276 | * | ||
277 | * Within the VM given by vm, creates a virtual translation for the page | ||
278 | * starting at vaddr to the page starting at paddr. | ||
279 | */ | ||
280 | void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, | ||
281 | uint32_t pgd_memslot) | ||
282 | { | ||
283 | uint16_t index[4]; | ||
284 | struct pageMapL4Entry *pml4e; | ||
285 | |||
286 | TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " | ||
287 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); | ||
288 | |||
289 | TEST_ASSERT((vaddr % vm->page_size) == 0, | ||
290 | "Virtual address not on page boundary,\n" | ||
291 | " vaddr: 0x%lx vm->page_size: 0x%x", | ||
292 | vaddr, vm->page_size); | ||
293 | TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, | ||
294 | (vaddr >> vm->page_shift)), | ||
295 | "Invalid virtual address, vaddr: 0x%lx", | ||
296 | vaddr); | ||
297 | TEST_ASSERT((paddr % vm->page_size) == 0, | ||
298 | "Physical address not on page boundary,\n" | ||
299 | " paddr: 0x%lx vm->page_size: 0x%x", | ||
300 | paddr, vm->page_size); | ||
301 | TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, | ||
302 | "Physical address beyond beyond maximum supported,\n" | ||
303 | " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", | ||
304 | paddr, vm->max_gfn, vm->page_size); | ||
305 | |||
306 | index[0] = (vaddr >> 12) & 0x1ffu; | ||
307 | index[1] = (vaddr >> 21) & 0x1ffu; | ||
308 | index[2] = (vaddr >> 30) & 0x1ffu; | ||
309 | index[3] = (vaddr >> 39) & 0x1ffu; | ||
310 | |||
311 | /* Allocate page directory pointer table if not present. */ | ||
312 | pml4e = addr_gpa2hva(vm, vm->pgd); | ||
313 | if (!pml4e[index[3]].present) { | ||
314 | pml4e[index[3]].address = vm_phy_page_alloc(vm, | ||
315 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) | ||
316 | >> vm->page_shift; | ||
317 | pml4e[index[3]].writable = true; | ||
318 | pml4e[index[3]].present = true; | ||
319 | } | ||
320 | |||
321 | /* Allocate page directory table if not present. */ | ||
322 | struct pageDirectoryPointerEntry *pdpe; | ||
323 | pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); | ||
324 | if (!pdpe[index[2]].present) { | ||
325 | pdpe[index[2]].address = vm_phy_page_alloc(vm, | ||
326 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) | ||
327 | >> vm->page_shift; | ||
328 | pdpe[index[2]].writable = true; | ||
329 | pdpe[index[2]].present = true; | ||
330 | } | ||
331 | |||
332 | /* Allocate page table if not present. */ | ||
333 | struct pageDirectoryEntry *pde; | ||
334 | pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); | ||
335 | if (!pde[index[1]].present) { | ||
336 | pde[index[1]].address = vm_phy_page_alloc(vm, | ||
337 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) | ||
338 | >> vm->page_shift; | ||
339 | pde[index[1]].writable = true; | ||
340 | pde[index[1]].present = true; | ||
341 | } | ||
342 | |||
343 | /* Fill in page table entry. */ | ||
344 | struct pageTableEntry *pte; | ||
345 | pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); | ||
346 | pte[index[0]].address = paddr >> vm->page_shift; | ||
347 | pte[index[0]].writable = true; | ||
348 | pte[index[0]].present = 1; | ||
349 | } | ||
350 | |||
351 | /* Virtual Translation Tables Dump | ||
352 | * | ||
353 | * Input Args: | ||
354 | * vm - Virtual Machine | ||
355 | * indent - Left margin indent amount | ||
356 | * | ||
357 | * Output Args: | ||
358 | * stream - Output FILE stream | ||
359 | * | ||
360 | * Return: None | ||
361 | * | ||
362 | * Dumps to the FILE stream given by stream, the contents of all the | ||
363 | * virtual translation tables for the VM given by vm. | ||
364 | */ | ||
365 | void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) | ||
366 | { | ||
367 | struct pageMapL4Entry *pml4e, *pml4e_start; | ||
368 | struct pageDirectoryPointerEntry *pdpe, *pdpe_start; | ||
369 | struct pageDirectoryEntry *pde, *pde_start; | ||
370 | struct pageTableEntry *pte, *pte_start; | ||
371 | |||
372 | if (!vm->pgd_created) | ||
373 | return; | ||
374 | |||
375 | fprintf(stream, "%*s " | ||
376 | " no\n", indent, ""); | ||
377 | fprintf(stream, "%*s index hvaddr gpaddr " | ||
378 | "addr w exec dirty\n", | ||
379 | indent, ""); | ||
380 | pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, | ||
381 | vm->pgd); | ||
382 | for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { | ||
383 | pml4e = &pml4e_start[n1]; | ||
384 | if (!pml4e->present) | ||
385 | continue; | ||
386 | fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " | ||
387 | " %u\n", | ||
388 | indent, "", | ||
389 | pml4e - pml4e_start, pml4e, | ||
390 | addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, | ||
391 | pml4e->writable, pml4e->execute_disable); | ||
392 | |||
393 | pdpe_start = addr_gpa2hva(vm, pml4e->address | ||
394 | * vm->page_size); | ||
395 | for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { | ||
396 | pdpe = &pdpe_start[n2]; | ||
397 | if (!pdpe->present) | ||
398 | continue; | ||
399 | fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " | ||
400 | "%u %u\n", | ||
401 | indent, "", | ||
402 | pdpe - pdpe_start, pdpe, | ||
403 | addr_hva2gpa(vm, pdpe), | ||
404 | (uint64_t) pdpe->address, pdpe->writable, | ||
405 | pdpe->execute_disable); | ||
406 | |||
407 | pde_start = addr_gpa2hva(vm, | ||
408 | pdpe->address * vm->page_size); | ||
409 | for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { | ||
410 | pde = &pde_start[n3]; | ||
411 | if (!pde->present) | ||
412 | continue; | ||
413 | fprintf(stream, "%*spde 0x%-3zx %p " | ||
414 | "0x%-12lx 0x%-10lx %u %u\n", | ||
415 | indent, "", pde - pde_start, pde, | ||
416 | addr_hva2gpa(vm, pde), | ||
417 | (uint64_t) pde->address, pde->writable, | ||
418 | pde->execute_disable); | ||
419 | |||
420 | pte_start = addr_gpa2hva(vm, | ||
421 | pde->address * vm->page_size); | ||
422 | for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { | ||
423 | pte = &pte_start[n4]; | ||
424 | if (!pte->present) | ||
425 | continue; | ||
426 | fprintf(stream, "%*spte 0x%-3zx %p " | ||
427 | "0x%-12lx 0x%-10lx %u %u " | ||
428 | " %u 0x%-10lx\n", | ||
429 | indent, "", | ||
430 | pte - pte_start, pte, | ||
431 | addr_hva2gpa(vm, pte), | ||
432 | (uint64_t) pte->address, | ||
433 | pte->writable, | ||
434 | pte->execute_disable, | ||
435 | pte->dirty, | ||
436 | ((uint64_t) n1 << 27) | ||
437 | | ((uint64_t) n2 << 18) | ||
438 | | ((uint64_t) n3 << 9) | ||
439 | | ((uint64_t) n4)); | ||
440 | } | ||
441 | } | ||
442 | } | ||
443 | } | ||
444 | } | ||
445 | |||
446 | /* Set Unusable Segment | ||
447 | * | ||
448 | * Input Args: None | ||
449 | * | ||
450 | * Output Args: | ||
451 | * segp - Pointer to segment register | ||
452 | * | ||
453 | * Return: None | ||
454 | * | ||
455 | * Sets the segment register pointed to by segp to an unusable state. | ||
456 | */ | ||
457 | static void kvm_seg_set_unusable(struct kvm_segment *segp) | ||
458 | { | ||
459 | memset(segp, 0, sizeof(*segp)); | ||
460 | segp->unusable = true; | ||
461 | } | ||
462 | |||
463 | /* Set Long Mode Flat Kernel Code Segment | ||
464 | * | ||
465 | * Input Args: | ||
466 | * selector - selector value | ||
467 | * | ||
468 | * Output Args: | ||
469 | * segp - Pointer to KVM segment | ||
470 | * | ||
471 | * Return: None | ||
472 | * | ||
473 | * Sets up the KVM segment pointed to by segp, to be a code segment | ||
474 | * with the selector value given by selector. | ||
475 | */ | ||
476 | static void kvm_seg_set_kernel_code_64bit(uint16_t selector, | ||
477 | struct kvm_segment *segp) | ||
478 | { | ||
479 | memset(segp, 0, sizeof(*segp)); | ||
480 | segp->selector = selector; | ||
481 | segp->limit = 0xFFFFFFFFu; | ||
482 | segp->s = 0x1; /* kTypeCodeData */ | ||
483 | segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed | ||
484 | * | kFlagCodeReadable | ||
485 | */ | ||
486 | segp->g = true; | ||
487 | segp->l = true; | ||
488 | segp->present = 1; | ||
489 | } | ||
490 | |||
491 | /* Set Long Mode Flat Kernel Data Segment | ||
492 | * | ||
493 | * Input Args: | ||
494 | * selector - selector value | ||
495 | * | ||
496 | * Output Args: | ||
497 | * segp - Pointer to KVM segment | ||
498 | * | ||
499 | * Return: None | ||
500 | * | ||
501 | * Sets up the KVM segment pointed to by segp, to be a data segment | ||
502 | * with the selector value given by selector. | ||
503 | */ | ||
504 | static void kvm_seg_set_kernel_data_64bit(uint16_t selector, | ||
505 | struct kvm_segment *segp) | ||
506 | { | ||
507 | memset(segp, 0, sizeof(*segp)); | ||
508 | segp->selector = selector; | ||
509 | segp->limit = 0xFFFFFFFFu; | ||
510 | segp->s = 0x1; /* kTypeCodeData */ | ||
511 | segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed | ||
512 | * | kFlagDataWritable | ||
513 | */ | ||
514 | segp->g = true; | ||
515 | segp->present = true; | ||
516 | } | ||
517 | |||
518 | /* Address Guest Virtual to Guest Physical | ||
519 | * | ||
520 | * Input Args: | ||
521 | * vm - Virtual Machine | ||
522 | * gpa - VM virtual address | ||
523 | * | ||
524 | * Output Args: None | ||
525 | * | ||
526 | * Return: | ||
527 | * Equivalent VM physical address | ||
528 | * | ||
529 | * Translates the VM virtual address given by gva to a VM physical | ||
530 | * address and then locates the memory region containing the VM | ||
531 | * physical address, within the VM given by vm. When found, the host | ||
532 | * virtual address providing the memory to the vm physical address is returned. | ||
533 | * A TEST_ASSERT failure occurs if no region containing translated | ||
534 | * VM virtual address exists. | ||
535 | */ | ||
536 | vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) | ||
537 | { | ||
538 | uint16_t index[4]; | ||
539 | struct pageMapL4Entry *pml4e; | ||
540 | struct pageDirectoryPointerEntry *pdpe; | ||
541 | struct pageDirectoryEntry *pde; | ||
542 | struct pageTableEntry *pte; | ||
543 | void *hva; | ||
544 | |||
545 | TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " | ||
546 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); | ||
547 | |||
548 | index[0] = (gva >> 12) & 0x1ffu; | ||
549 | index[1] = (gva >> 21) & 0x1ffu; | ||
550 | index[2] = (gva >> 30) & 0x1ffu; | ||
551 | index[3] = (gva >> 39) & 0x1ffu; | ||
552 | |||
553 | if (!vm->pgd_created) | ||
554 | goto unmapped_gva; | ||
555 | pml4e = addr_gpa2hva(vm, vm->pgd); | ||
556 | if (!pml4e[index[3]].present) | ||
557 | goto unmapped_gva; | ||
558 | |||
559 | pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); | ||
560 | if (!pdpe[index[2]].present) | ||
561 | goto unmapped_gva; | ||
562 | |||
563 | pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); | ||
564 | if (!pde[index[1]].present) | ||
565 | goto unmapped_gva; | ||
566 | |||
567 | pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); | ||
568 | if (!pte[index[0]].present) | ||
569 | goto unmapped_gva; | ||
570 | |||
571 | return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); | ||
572 | |||
573 | unmapped_gva: | ||
574 | TEST_ASSERT(false, "No mapping for vm virtual address, " | ||
575 | "gva: 0x%lx", gva); | ||
576 | } | ||
577 | |||
578 | void vcpu_setup(struct kvm_vm *vm, int vcpuid) | ||
579 | { | ||
580 | struct kvm_sregs sregs; | ||
581 | |||
582 | /* Set mode specific system register values. */ | ||
583 | vcpu_sregs_get(vm, vcpuid, &sregs); | ||
584 | |||
585 | switch (vm->mode) { | ||
586 | case VM_MODE_FLAT48PG: | ||
587 | sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; | ||
588 | sregs.cr4 |= X86_CR4_PAE; | ||
589 | sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); | ||
590 | |||
591 | kvm_seg_set_unusable(&sregs.ldt); | ||
592 | kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs); | ||
593 | kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds); | ||
594 | kvm_seg_set_kernel_data_64bit(0x10, &sregs.es); | ||
595 | break; | ||
596 | |||
597 | default: | ||
598 | TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); | ||
599 | } | ||
600 | vcpu_sregs_set(vm, vcpuid, &sregs); | ||
601 | |||
602 | /* If virtual translation table have been setup, set system register | ||
603 | * to point to the tables. It's okay if they haven't been setup yet, | ||
604 | * in that the code that sets up the virtual translation tables, will | ||
605 | * go back through any VCPUs that have already been created and set | ||
606 | * their values. | ||
607 | */ | ||
608 | if (vm->pgd_created) { | ||
609 | struct kvm_sregs sregs; | ||
610 | |||
611 | vcpu_sregs_get(vm, vcpuid, &sregs); | ||
612 | |||
613 | sregs.cr3 = vm->pgd; | ||
614 | vcpu_sregs_set(vm, vcpuid, &sregs); | ||
615 | } | ||
616 | } | ||
617 | /* Adds a vCPU with reasonable defaults (i.e., a stack) | ||
618 | * | ||
619 | * Input Args: | ||
620 | * vcpuid - The id of the VCPU to add to the VM. | ||
621 | * guest_code - The vCPU's entry point | ||
622 | */ | ||
623 | void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) | ||
624 | { | ||
625 | struct kvm_mp_state mp_state; | ||
626 | struct kvm_regs regs; | ||
627 | vm_vaddr_t stack_vaddr; | ||
628 | stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), | ||
629 | DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); | ||
630 | |||
631 | /* Create VCPU */ | ||
632 | vm_vcpu_add(vm, vcpuid); | ||
633 | |||
634 | /* Setup guest general purpose registers */ | ||
635 | vcpu_regs_get(vm, vcpuid, ®s); | ||
636 | regs.rflags = regs.rflags | 0x2; | ||
637 | regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); | ||
638 | regs.rip = (unsigned long) guest_code; | ||
639 | vcpu_regs_set(vm, vcpuid, ®s); | ||
640 | |||
641 | /* Setup the MP state */ | ||
642 | mp_state.mp_state = 0; | ||
643 | vcpu_set_mp_state(vm, vcpuid, &mp_state); | ||
644 | } | ||
645 | |||
646 | /* VM VCPU CPUID Set | ||
647 | * | ||
648 | * Input Args: | ||
649 | * vm - Virtual Machine | ||
650 | * vcpuid - VCPU id | ||
651 | * cpuid - The CPUID values to set. | ||
652 | * | ||
653 | * Output Args: None | ||
654 | * | ||
655 | * Return: void | ||
656 | * | ||
657 | * Set the VCPU's CPUID. | ||
658 | */ | ||
659 | void vcpu_set_cpuid(struct kvm_vm *vm, | ||
660 | uint32_t vcpuid, struct kvm_cpuid2 *cpuid) | ||
661 | { | ||
662 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
663 | int rc; | ||
664 | |||
665 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
666 | |||
667 | rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); | ||
668 | TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", | ||
669 | rc, errno); | ||
670 | |||
671 | } | ||
672 | /* Create a VM with reasonable defaults | ||
673 | * | ||
674 | * Input Args: | ||
675 | * vcpuid - The id of the single VCPU to add to the VM. | ||
676 | * guest_code - The vCPU's entry point | ||
677 | * | ||
678 | * Output Args: None | ||
679 | * | ||
680 | * Return: | ||
681 | * Pointer to opaque structure that describes the created VM. | ||
682 | */ | ||
683 | struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code) | ||
684 | { | ||
685 | struct kvm_vm *vm; | ||
686 | |||
687 | /* Create VM */ | ||
688 | vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR); | ||
689 | |||
690 | /* Setup IRQ Chip */ | ||
691 | vm_create_irqchip(vm); | ||
692 | |||
693 | /* Add the first vCPU. */ | ||
694 | vm_vcpu_add_default(vm, vcpuid, guest_code); | ||
695 | |||
696 | return vm; | ||
697 | } | ||
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c new file mode 100644 index 000000000000..090fd3f19352 --- /dev/null +++ b/tools/testing/selftests/kvm/set_sregs_test.c | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * KVM_SET_SREGS tests | ||
3 | * | ||
4 | * Copyright (C) 2018, Google LLC. | ||
5 | * | ||
6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
7 | * | ||
8 | * This is a regression test for the bug fixed by the following commit: | ||
9 | * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") | ||
10 | * | ||
11 | * That bug allowed a user-mode program that called the KVM_SET_SREGS | ||
12 | * ioctl to put a VCPU's local APIC into an invalid state. | ||
13 | * | ||
14 | */ | ||
15 | #define _GNU_SOURCE /* for program_invocation_short_name */ | ||
16 | #include <fcntl.h> | ||
17 | #include <stdio.h> | ||
18 | #include <stdlib.h> | ||
19 | #include <string.h> | ||
20 | #include <sys/ioctl.h> | ||
21 | |||
22 | #include "test_util.h" | ||
23 | |||
24 | #include "kvm_util.h" | ||
25 | #include "x86.h" | ||
26 | |||
27 | #define VCPU_ID 5 | ||
28 | |||
29 | int main(int argc, char *argv[]) | ||
30 | { | ||
31 | struct kvm_sregs sregs; | ||
32 | struct kvm_vm *vm; | ||
33 | int rc; | ||
34 | |||
35 | /* Tell stdout not to buffer its content */ | ||
36 | setbuf(stdout, NULL); | ||
37 | |||
38 | /* Create VM */ | ||
39 | vm = vm_create_default(VCPU_ID, NULL); | ||
40 | |||
41 | vcpu_sregs_get(vm, VCPU_ID, &sregs); | ||
42 | sregs.apic_base = 1 << 10; | ||
43 | rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); | ||
44 | TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", | ||
45 | sregs.apic_base); | ||
46 | sregs.apic_base = 1 << 11; | ||
47 | rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); | ||
48 | TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", | ||
49 | sregs.apic_base); | ||
50 | |||
51 | kvm_vm_free(vm); | ||
52 | |||
53 | return 0; | ||
54 | } | ||