aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2018-03-27 05:49:19 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2018-04-04 13:11:00 -0400
commit783e9e51266ebb7f78c606a53cb0fa41bb7c31a0 (patch)
treef0439ce2ef96051b8eb055f5696eb0e809037f23
parent3140c156e919b0f5fad5c5f6cf7876c39d1d4f06 (diff)
kvm: selftests: add API testing infrastructure
Testsuite contributed by Google and cleaned up by myself for inclusion in Linux. Signed-off-by: Ken Hofsass <hofsass@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/kvm/Makefile38
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h139
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h75
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h45
-rw-r--r--tools/testing/selftests/kvm/include/x86.h1043
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c87
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c1480
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h67
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c2087
-rw-r--r--tools/testing/selftests/kvm/lib/x86.c697
-rw-r--r--tools/testing/selftests/kvm/set_sregs_test.c54
12 files changed, 5813 insertions, 0 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 7442dfb73b7f..c98f1b874582 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -14,6 +14,7 @@ TARGETS += gpio
14TARGETS += intel_pstate 14TARGETS += intel_pstate
15TARGETS += ipc 15TARGETS += ipc
16TARGETS += kcmp 16TARGETS += kcmp
17TARGETS += kvm
17TARGETS += lib 18TARGETS += lib
18TARGETS += membarrier 19TARGETS += membarrier
19TARGETS += memfd 20TARGETS += memfd
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000000..6aade26e9ca2
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,38 @@
1all:
2
3top_srcdir = ../../../../
4UNAME_M := $(shell uname -m)
5
6LIBKVM = lib/assert.c lib/kvm_util.c lib/sparsebit.c
7LIBKVM_x86_64 = lib/x86.c
8
9TEST_GEN_PROGS_x86_64 = set_sregs_test
10
11TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
12LIBKVM += $(LIBKVM_$(UNAME_M))
13
14INSTALL_HDR_PATH = $(top_srcdir)/usr
15LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
16CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
17
18# After inclusion, $(OUTPUT) is defined and
19# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
20include ../lib.mk
21
22STATIC_LIBS := $(OUTPUT)/libkvm.a
23LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
24EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
25
26x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
27$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
28 $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
29
30$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
31 $(AR) crs $@ $^
32
33$(LINUX_HDR_PATH):
34 make -C $(top_srcdir) headers_install
35
36all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
37$(TEST_GEN_PROGS): $(STATIC_LIBS)
38$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000000..d2e3e23bfbd3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,139 @@
1/*
2 * tools/testing/selftests/kvm/include/kvm_util.h
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 *
8 */
9#ifndef SELFTEST_KVM_UTIL_H
10#define SELFTEST_KVM_UTIL_H 1
11
12#include "test_util.h"
13
14#include "asm/kvm.h"
15#include "linux/kvm.h"
16#include <sys/ioctl.h>
17
18#include "sparsebit.h"
19
20/*
21 * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
22 * created. Only applies to VMs using EPT.
23 */
24#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
25
26
27/* Callers of kvm_util only have an incomplete/opaque description of the
28 * structure kvm_util is using to maintain the state of a VM.
29 */
30struct kvm_vm;
31
32typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
33typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
34
35/* Minimum allocated guest virtual and physical addresses */
36#define KVM_UTIL_MIN_VADDR 0x2000
37
38#define DEFAULT_GUEST_PHY_PAGES 512
39#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
40#define DEFAULT_STACK_PGS 5
41
42enum vm_guest_mode {
43 VM_MODE_FLAT48PG,
44};
45
46enum vm_mem_backing_src_type {
47 VM_MEM_SRC_ANONYMOUS,
48 VM_MEM_SRC_ANONYMOUS_THP,
49 VM_MEM_SRC_ANONYMOUS_HUGETLB,
50};
51
52int kvm_check_cap(long cap);
53
54struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
55void kvm_vm_free(struct kvm_vm *vmp);
56
57int kvm_memcmp_hva_gva(void *hva,
58 struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
59
60void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
61void vcpu_dump(FILE *stream, struct kvm_vm *vm,
62 uint32_t vcpuid, uint8_t indent);
63
64void vm_create_irqchip(struct kvm_vm *vm);
65
66void vm_userspace_mem_region_add(struct kvm_vm *vm,
67 enum vm_mem_backing_src_type src_type,
68 uint64_t guest_paddr, uint32_t slot, uint64_t npages,
69 uint32_t flags);
70
71void vcpu_ioctl(struct kvm_vm *vm,
72 uint32_t vcpuid, unsigned long ioctl, void *arg);
73void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
74void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
75void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
76vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
77 uint32_t data_memslot, uint32_t pgd_memslot);
78void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
79void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
80vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
81vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
82
83struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
84void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
85int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
86void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
87 struct kvm_mp_state *mp_state);
88void vcpu_regs_get(struct kvm_vm *vm,
89 uint32_t vcpuid, struct kvm_regs *regs);
90void vcpu_regs_set(struct kvm_vm *vm,
91 uint32_t vcpuid, struct kvm_regs *regs);
92void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
93void vcpu_sregs_get(struct kvm_vm *vm,
94 uint32_t vcpuid, struct kvm_sregs *sregs);
95void vcpu_sregs_set(struct kvm_vm *vm,
96 uint32_t vcpuid, struct kvm_sregs *sregs);
97int _vcpu_sregs_set(struct kvm_vm *vm,
98 uint32_t vcpuid, struct kvm_sregs *sregs);
99void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
100 struct kvm_vcpu_events *events);
101void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
102 struct kvm_vcpu_events *events);
103
104const char *exit_reason_str(unsigned int exit_reason);
105
106void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
107void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
108 uint32_t pgd_memslot);
109vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
110 vm_paddr_t paddr_min, uint32_t memslot);
111
112void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid);
113void vcpu_set_cpuid(
114 struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
115
116struct kvm_cpuid2 *allocate_kvm_cpuid2(void);
117struct kvm_cpuid_entry2 *
118find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
119 uint32_t index);
120
121static inline struct kvm_cpuid_entry2 *
122find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function)
123{
124 return find_cpuid_index_entry(cpuid, function, 0);
125}
126
127struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
128void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
129
130struct kvm_userspace_memory_region *
131kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
132 uint64_t end);
133
134struct kvm_dirty_log *
135allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
136
137int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
138
139#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000000..54cfeb6568d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,75 @@
1/*
2 * tools/testing/selftests/kvm/include/sparsebit.h
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 *
8 *
9 * Header file that describes API to the sparsebit library.
10 * This library provides a memory efficient means of storing
11 * the settings of bits indexed via a uint64_t. Memory usage
12 * is reasonable, significantly less than (2^64 / 8) bytes, as
13 * long as bits that are mostly set or mostly cleared are close
14 * to each other. This library is efficient in memory usage
15 * even in the case where most bits are set.
16 */
17
18#ifndef _TEST_SPARSEBIT_H_
19#define _TEST_SPARSEBIT_H_
20
21#include <stdbool.h>
22#include <stdint.h>
23#include <stdio.h>
24
25#ifdef __cplusplus
26extern "C" {
27#endif
28
29struct sparsebit;
30typedef uint64_t sparsebit_idx_t;
31typedef uint64_t sparsebit_num_t;
32
33struct sparsebit *sparsebit_alloc(void);
34void sparsebit_free(struct sparsebit **sbitp);
35void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
36
37bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
38bool sparsebit_is_set_num(struct sparsebit *sbit,
39 sparsebit_idx_t idx, sparsebit_num_t num);
40bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
41bool sparsebit_is_clear_num(struct sparsebit *sbit,
42 sparsebit_idx_t idx, sparsebit_num_t num);
43sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
44bool sparsebit_any_set(struct sparsebit *sbit);
45bool sparsebit_any_clear(struct sparsebit *sbit);
46bool sparsebit_all_set(struct sparsebit *sbit);
47bool sparsebit_all_clear(struct sparsebit *sbit);
48sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
49sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
50sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
51sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
52sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
53 sparsebit_idx_t start, sparsebit_num_t num);
54sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
55 sparsebit_idx_t start, sparsebit_num_t num);
56
57void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
58void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
59 sparsebit_num_t num);
60void sparsebit_set_all(struct sparsebit *sbitp);
61
62void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
63void sparsebit_clear_num(struct sparsebit *sbitp,
64 sparsebit_idx_t start, sparsebit_num_t num);
65void sparsebit_clear_all(struct sparsebit *sbitp);
66
67void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
68 unsigned int indent);
69void sparsebit_validate_internal(struct sparsebit *sbit);
70
71#ifdef __cplusplus
72}
73#endif
74
75#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000000..7ab98e41324f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,45 @@
1/*
2 * tools/testing/selftests/kvm/include/test_util.h
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 *
8 */
9
10#ifndef TEST_UTIL_H
11#define TEST_UTIL_H 1
12
13#include <stdlib.h>
14#include <stdarg.h>
15#include <stdbool.h>
16#include <stdio.h>
17#include <string.h>
18#include <inttypes.h>
19#include <errno.h>
20#include <unistd.h>
21#include <fcntl.h>
22
23ssize_t test_write(int fd, const void *buf, size_t count);
24ssize_t test_read(int fd, void *buf, size_t count);
25int test_seq_read(const char *path, char **bufp, size_t *sizep);
26
27void test_assert(bool exp, const char *exp_str,
28 const char *file, unsigned int line, const char *fmt, ...);
29
30#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
31
32#define TEST_ASSERT(e, fmt, ...) \
33 test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
34
35#define ASSERT_EQ(a, b) do { \
36 typeof(a) __a = (a); \
37 typeof(b) __b = (b); \
38 TEST_ASSERT(__a == __b, \
39 "ASSERT_EQ(%s, %s) failed.\n" \
40 "\t%s is %#lx\n" \
41 "\t%s is %#lx", \
42 #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
43} while (0)
44
45#endif /* TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h
new file mode 100644
index 000000000000..4a5b2c4c1a0f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86.h
@@ -0,0 +1,1043 @@
1/*
2 * tools/testing/selftests/kvm/include/x86.h
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 *
8 */
9
10#ifndef SELFTEST_KVM_X86_H
11#define SELFTEST_KVM_X86_H
12
13#include <assert.h>
14#include <stdint.h>
15
16#define X86_EFLAGS_FIXED (1u << 1)
17
18#define X86_CR4_VME (1ul << 0)
19#define X86_CR4_PVI (1ul << 1)
20#define X86_CR4_TSD (1ul << 2)
21#define X86_CR4_DE (1ul << 3)
22#define X86_CR4_PSE (1ul << 4)
23#define X86_CR4_PAE (1ul << 5)
24#define X86_CR4_MCE (1ul << 6)
25#define X86_CR4_PGE (1ul << 7)
26#define X86_CR4_PCE (1ul << 8)
27#define X86_CR4_OSFXSR (1ul << 9)
28#define X86_CR4_OSXMMEXCPT (1ul << 10)
29#define X86_CR4_UMIP (1ul << 11)
30#define X86_CR4_VMXE (1ul << 13)
31#define X86_CR4_SMXE (1ul << 14)
32#define X86_CR4_FSGSBASE (1ul << 16)
33#define X86_CR4_PCIDE (1ul << 17)
34#define X86_CR4_OSXSAVE (1ul << 18)
35#define X86_CR4_SMEP (1ul << 20)
36#define X86_CR4_SMAP (1ul << 21)
37#define X86_CR4_PKE (1ul << 22)
38
39/* The enum values match the intruction encoding of each register */
40enum x86_register {
41 RAX = 0,
42 RCX,
43 RDX,
44 RBX,
45 RSP,
46 RBP,
47 RSI,
48 RDI,
49 R8,
50 R9,
51 R10,
52 R11,
53 R12,
54 R13,
55 R14,
56 R15,
57};
58
59struct desc64 {
60 uint16_t limit0;
61 uint16_t base0;
62 unsigned base1:8, type:5, dpl:2, p:1;
63 unsigned limit1:4, zero0:3, g:1, base2:8;
64 uint32_t base3;
65 uint32_t zero1;
66} __attribute__((packed));
67
68struct desc_ptr {
69 uint16_t size;
70 uint64_t address;
71} __attribute__((packed));
72
73static inline uint64_t get_desc64_base(const struct desc64 *desc)
74{
75 return ((uint64_t)desc->base3 << 32) |
76 (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
77}
78
79static inline uint64_t rdtsc(void)
80{
81 uint32_t eax, edx;
82
83 /*
84 * The lfence is to wait (on Intel CPUs) until all previous
85 * instructions have been executed.
86 */
87 __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
88 return ((uint64_t)edx) << 32 | eax;
89}
90
91static inline uint64_t rdtscp(uint32_t *aux)
92{
93 uint32_t eax, edx;
94
95 __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
96 return ((uint64_t)edx) << 32 | eax;
97}
98
99static inline uint64_t rdmsr(uint32_t msr)
100{
101 uint32_t a, d;
102
103 __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
104
105 return a | ((uint64_t) d << 32);
106}
107
108static inline void wrmsr(uint32_t msr, uint64_t value)
109{
110 uint32_t a = value;
111 uint32_t d = value >> 32;
112
113 __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
114}
115
116
117static inline uint16_t inw(uint16_t port)
118{
119 uint16_t tmp;
120
121 __asm__ __volatile__("in %%dx, %%ax"
122 : /* output */ "=a" (tmp)
123 : /* input */ "d" (port));
124
125 return tmp;
126}
127
128static inline uint16_t get_es(void)
129{
130 uint16_t es;
131
132 __asm__ __volatile__("mov %%es, %[es]"
133 : /* output */ [es]"=rm"(es));
134 return es;
135}
136
137static inline uint16_t get_cs(void)
138{
139 uint16_t cs;
140
141 __asm__ __volatile__("mov %%cs, %[cs]"
142 : /* output */ [cs]"=rm"(cs));
143 return cs;
144}
145
146static inline uint16_t get_ss(void)
147{
148 uint16_t ss;
149
150 __asm__ __volatile__("mov %%ss, %[ss]"
151 : /* output */ [ss]"=rm"(ss));
152 return ss;
153}
154
155static inline uint16_t get_ds(void)
156{
157 uint16_t ds;
158
159 __asm__ __volatile__("mov %%ds, %[ds]"
160 : /* output */ [ds]"=rm"(ds));
161 return ds;
162}
163
164static inline uint16_t get_fs(void)
165{
166 uint16_t fs;
167
168 __asm__ __volatile__("mov %%fs, %[fs]"
169 : /* output */ [fs]"=rm"(fs));
170 return fs;
171}
172
173static inline uint16_t get_gs(void)
174{
175 uint16_t gs;
176
177 __asm__ __volatile__("mov %%gs, %[gs]"
178 : /* output */ [gs]"=rm"(gs));
179 return gs;
180}
181
182static inline uint16_t get_tr(void)
183{
184 uint16_t tr;
185
186 __asm__ __volatile__("str %[tr]"
187 : /* output */ [tr]"=rm"(tr));
188 return tr;
189}
190
191static inline uint64_t get_cr0(void)
192{
193 uint64_t cr0;
194
195 __asm__ __volatile__("mov %%cr0, %[cr0]"
196 : /* output */ [cr0]"=r"(cr0));
197 return cr0;
198}
199
200static inline uint64_t get_cr3(void)
201{
202 uint64_t cr3;
203
204 __asm__ __volatile__("mov %%cr3, %[cr3]"
205 : /* output */ [cr3]"=r"(cr3));
206 return cr3;
207}
208
209static inline uint64_t get_cr4(void)
210{
211 uint64_t cr4;
212
213 __asm__ __volatile__("mov %%cr4, %[cr4]"
214 : /* output */ [cr4]"=r"(cr4));
215 return cr4;
216}
217
218static inline void set_cr4(uint64_t val)
219{
220 __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
221}
222
223static inline uint64_t get_gdt_base(void)
224{
225 struct desc_ptr gdt;
226 __asm__ __volatile__("sgdt %[gdt]"
227 : /* output */ [gdt]"=m"(gdt));
228 return gdt.address;
229}
230
231static inline uint64_t get_idt_base(void)
232{
233 struct desc_ptr idt;
234 __asm__ __volatile__("sidt %[idt]"
235 : /* output */ [idt]"=m"(idt));
236 return idt.address;
237}
238
239#define SET_XMM(__var, __xmm) \
240 asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
241
242static inline void set_xmm(int n, unsigned long val)
243{
244 switch (n) {
245 case 0:
246 SET_XMM(val, xmm0);
247 break;
248 case 1:
249 SET_XMM(val, xmm1);
250 break;
251 case 2:
252 SET_XMM(val, xmm2);
253 break;
254 case 3:
255 SET_XMM(val, xmm3);
256 break;
257 case 4:
258 SET_XMM(val, xmm4);
259 break;
260 case 5:
261 SET_XMM(val, xmm5);
262 break;
263 case 6:
264 SET_XMM(val, xmm6);
265 break;
266 case 7:
267 SET_XMM(val, xmm7);
268 break;
269 }
270}
271
272typedef unsigned long v1di __attribute__ ((vector_size (8)));
273static inline unsigned long get_xmm(int n)
274{
275 assert(n >= 0 && n <= 7);
276
277 register v1di xmm0 __asm__("%xmm0");
278 register v1di xmm1 __asm__("%xmm1");
279 register v1di xmm2 __asm__("%xmm2");
280 register v1di xmm3 __asm__("%xmm3");
281 register v1di xmm4 __asm__("%xmm4");
282 register v1di xmm5 __asm__("%xmm5");
283 register v1di xmm6 __asm__("%xmm6");
284 register v1di xmm7 __asm__("%xmm7");
285 switch (n) {
286 case 0:
287 return (unsigned long)xmm0;
288 case 1:
289 return (unsigned long)xmm1;
290 case 2:
291 return (unsigned long)xmm2;
292 case 3:
293 return (unsigned long)xmm3;
294 case 4:
295 return (unsigned long)xmm4;
296 case 5:
297 return (unsigned long)xmm5;
298 case 6:
299 return (unsigned long)xmm6;
300 case 7:
301 return (unsigned long)xmm7;
302 }
303 return 0;
304}
305
306/*
307 * Basic CPU control in CR0
308 */
309#define X86_CR0_PE (1UL<<0) /* Protection Enable */
310#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
311#define X86_CR0_EM (1UL<<2) /* Emulation */
312#define X86_CR0_TS (1UL<<3) /* Task Switched */
313#define X86_CR0_ET (1UL<<4) /* Extension Type */
314#define X86_CR0_NE (1UL<<5) /* Numeric Error */
315#define X86_CR0_WP (1UL<<16) /* Write Protect */
316#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
317#define X86_CR0_NW (1UL<<29) /* Not Write-through */
318#define X86_CR0_CD (1UL<<30) /* Cache Disable */
319#define X86_CR0_PG (1UL<<31) /* Paging */
320
321/*
322 * CPU model specific register (MSR) numbers.
323 */
324
325/* x86-64 specific MSRs */
326#define MSR_EFER 0xc0000080 /* extended feature register */
327#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
328#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
329#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
330#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
331#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
332#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
333#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
334#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
335
336/* EFER bits: */
337#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */
338#define EFER_LME (1<<8) /* Long mode enable */
339#define EFER_LMA (1<<10) /* Long mode active (read-only) */
340#define EFER_NX (1<<11) /* No execute enable */
341#define EFER_SVME (1<<12) /* Enable virtualization */
342#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */
343#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */
344
345/* Intel MSRs. Some also available on other CPUs */
346
347#define MSR_PPIN_CTL 0x0000004e
348#define MSR_PPIN 0x0000004f
349
350#define MSR_IA32_PERFCTR0 0x000000c1
351#define MSR_IA32_PERFCTR1 0x000000c2
352#define MSR_FSB_FREQ 0x000000cd
353#define MSR_PLATFORM_INFO 0x000000ce
354#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
355#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
356
357#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
358#define NHM_C3_AUTO_DEMOTE (1UL << 25)
359#define NHM_C1_AUTO_DEMOTE (1UL << 26)
360#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
361#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
362#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
363
364#define MSR_MTRRcap 0x000000fe
365#define MSR_IA32_BBL_CR_CTL 0x00000119
366#define MSR_IA32_BBL_CR_CTL3 0x0000011e
367
368#define MSR_IA32_SYSENTER_CS 0x00000174
369#define MSR_IA32_SYSENTER_ESP 0x00000175
370#define MSR_IA32_SYSENTER_EIP 0x00000176
371
372#define MSR_IA32_MCG_CAP 0x00000179
373#define MSR_IA32_MCG_STATUS 0x0000017a
374#define MSR_IA32_MCG_CTL 0x0000017b
375#define MSR_IA32_MCG_EXT_CTL 0x000004d0
376
377#define MSR_OFFCORE_RSP_0 0x000001a6
378#define MSR_OFFCORE_RSP_1 0x000001a7
379#define MSR_TURBO_RATIO_LIMIT 0x000001ad
380#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
381#define MSR_TURBO_RATIO_LIMIT2 0x000001af
382
383#define MSR_LBR_SELECT 0x000001c8
384#define MSR_LBR_TOS 0x000001c9
385#define MSR_LBR_NHM_FROM 0x00000680
386#define MSR_LBR_NHM_TO 0x000006c0
387#define MSR_LBR_CORE_FROM 0x00000040
388#define MSR_LBR_CORE_TO 0x00000060
389
390#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
391#define LBR_INFO_MISPRED BIT_ULL(63)
392#define LBR_INFO_IN_TX BIT_ULL(62)
393#define LBR_INFO_ABORT BIT_ULL(61)
394#define LBR_INFO_CYCLES 0xffff
395
396#define MSR_IA32_PEBS_ENABLE 0x000003f1
397#define MSR_IA32_DS_AREA 0x00000600
398#define MSR_IA32_PERF_CAPABILITIES 0x00000345
399#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
400
401#define MSR_IA32_RTIT_CTL 0x00000570
402#define MSR_IA32_RTIT_STATUS 0x00000571
403#define MSR_IA32_RTIT_ADDR0_A 0x00000580
404#define MSR_IA32_RTIT_ADDR0_B 0x00000581
405#define MSR_IA32_RTIT_ADDR1_A 0x00000582
406#define MSR_IA32_RTIT_ADDR1_B 0x00000583
407#define MSR_IA32_RTIT_ADDR2_A 0x00000584
408#define MSR_IA32_RTIT_ADDR2_B 0x00000585
409#define MSR_IA32_RTIT_ADDR3_A 0x00000586
410#define MSR_IA32_RTIT_ADDR3_B 0x00000587
411#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
412#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
413#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
414
415#define MSR_MTRRfix64K_00000 0x00000250
416#define MSR_MTRRfix16K_80000 0x00000258
417#define MSR_MTRRfix16K_A0000 0x00000259
418#define MSR_MTRRfix4K_C0000 0x00000268
419#define MSR_MTRRfix4K_C8000 0x00000269
420#define MSR_MTRRfix4K_D0000 0x0000026a
421#define MSR_MTRRfix4K_D8000 0x0000026b
422#define MSR_MTRRfix4K_E0000 0x0000026c
423#define MSR_MTRRfix4K_E8000 0x0000026d
424#define MSR_MTRRfix4K_F0000 0x0000026e
425#define MSR_MTRRfix4K_F8000 0x0000026f
426#define MSR_MTRRdefType 0x000002ff
427
428#define MSR_IA32_CR_PAT 0x00000277
429
430#define MSR_IA32_DEBUGCTLMSR 0x000001d9
431#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
432#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
433#define MSR_IA32_LASTINTFROMIP 0x000001dd
434#define MSR_IA32_LASTINTTOIP 0x000001de
435
436/* DEBUGCTLMSR bits (others vary by model): */
437#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
438#define DEBUGCTLMSR_BTF_SHIFT 1
439#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
440#define DEBUGCTLMSR_TR (1UL << 6)
441#define DEBUGCTLMSR_BTS (1UL << 7)
442#define DEBUGCTLMSR_BTINT (1UL << 8)
443#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
444#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
445#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
446#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
447#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
448
449#define MSR_PEBS_FRONTEND 0x000003f7
450
451#define MSR_IA32_POWER_CTL 0x000001fc
452
453#define MSR_IA32_MC0_CTL 0x00000400
454#define MSR_IA32_MC0_STATUS 0x00000401
455#define MSR_IA32_MC0_ADDR 0x00000402
456#define MSR_IA32_MC0_MISC 0x00000403
457
458/* C-state Residency Counters */
459#define MSR_PKG_C3_RESIDENCY 0x000003f8
460#define MSR_PKG_C6_RESIDENCY 0x000003f9
461#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
462#define MSR_PKG_C7_RESIDENCY 0x000003fa
463#define MSR_CORE_C3_RESIDENCY 0x000003fc
464#define MSR_CORE_C6_RESIDENCY 0x000003fd
465#define MSR_CORE_C7_RESIDENCY 0x000003fe
466#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
467#define MSR_PKG_C2_RESIDENCY 0x0000060d
468#define MSR_PKG_C8_RESIDENCY 0x00000630
469#define MSR_PKG_C9_RESIDENCY 0x00000631
470#define MSR_PKG_C10_RESIDENCY 0x00000632
471
472/* Interrupt Response Limit */
473#define MSR_PKGC3_IRTL 0x0000060a
474#define MSR_PKGC6_IRTL 0x0000060b
475#define MSR_PKGC7_IRTL 0x0000060c
476#define MSR_PKGC8_IRTL 0x00000633
477#define MSR_PKGC9_IRTL 0x00000634
478#define MSR_PKGC10_IRTL 0x00000635
479
480/* Run Time Average Power Limiting (RAPL) Interface */
481
482#define MSR_RAPL_POWER_UNIT 0x00000606
483
484#define MSR_PKG_POWER_LIMIT 0x00000610
485#define MSR_PKG_ENERGY_STATUS 0x00000611
486#define MSR_PKG_PERF_STATUS 0x00000613
487#define MSR_PKG_POWER_INFO 0x00000614
488
489#define MSR_DRAM_POWER_LIMIT 0x00000618
490#define MSR_DRAM_ENERGY_STATUS 0x00000619
491#define MSR_DRAM_PERF_STATUS 0x0000061b
492#define MSR_DRAM_POWER_INFO 0x0000061c
493
494#define MSR_PP0_POWER_LIMIT 0x00000638
495#define MSR_PP0_ENERGY_STATUS 0x00000639
496#define MSR_PP0_POLICY 0x0000063a
497#define MSR_PP0_PERF_STATUS 0x0000063b
498
499#define MSR_PP1_POWER_LIMIT 0x00000640
500#define MSR_PP1_ENERGY_STATUS 0x00000641
501#define MSR_PP1_POLICY 0x00000642
502
503/* Config TDP MSRs */
504#define MSR_CONFIG_TDP_NOMINAL 0x00000648
505#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
506#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
507#define MSR_CONFIG_TDP_CONTROL 0x0000064B
508#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
509
510#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
511
512#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
513#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
514#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
515#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
516
517#define MSR_CORE_C1_RES 0x00000660
518#define MSR_MODULE_C6_RES_MS 0x00000664
519
520#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
521#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
522
523#define MSR_ATOM_CORE_RATIOS 0x0000066a
524#define MSR_ATOM_CORE_VIDS 0x0000066b
525#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
526#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
527
528
529#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
530#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
531#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
532
533/* Hardware P state interface */
534#define MSR_PPERF 0x0000064e
535#define MSR_PERF_LIMIT_REASONS 0x0000064f
536#define MSR_PM_ENABLE 0x00000770
537#define MSR_HWP_CAPABILITIES 0x00000771
538#define MSR_HWP_REQUEST_PKG 0x00000772
539#define MSR_HWP_INTERRUPT 0x00000773
540#define MSR_HWP_REQUEST 0x00000774
541#define MSR_HWP_STATUS 0x00000777
542
543/* CPUID.6.EAX */
544#define HWP_BASE_BIT (1<<7)
545#define HWP_NOTIFICATIONS_BIT (1<<8)
546#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
547#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
548#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
549
550/* IA32_HWP_CAPABILITIES */
551#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
552#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
553#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
554#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
555
556/* IA32_HWP_REQUEST */
557#define HWP_MIN_PERF(x) (x & 0xff)
558#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
559#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
560#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
561#define HWP_EPP_PERFORMANCE 0x00
562#define HWP_EPP_BALANCE_PERFORMANCE 0x80
563#define HWP_EPP_BALANCE_POWERSAVE 0xC0
564#define HWP_EPP_POWERSAVE 0xFF
565#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
566#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
567
568/* IA32_HWP_STATUS */
569#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
570#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
571
572/* IA32_HWP_INTERRUPT */
573#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
574#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
575
576#define MSR_AMD64_MC0_MASK 0xc0010044
577
578#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
579#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
580#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
581#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
582
583#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
584
585/* These are consecutive and not in the normal 4er MCE bank block */
586#define MSR_IA32_MC0_CTL2 0x00000280
587#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
588
589#define MSR_P6_PERFCTR0 0x000000c1
590#define MSR_P6_PERFCTR1 0x000000c2
591#define MSR_P6_EVNTSEL0 0x00000186
592#define MSR_P6_EVNTSEL1 0x00000187
593
594#define MSR_KNC_PERFCTR0 0x00000020
595#define MSR_KNC_PERFCTR1 0x00000021
596#define MSR_KNC_EVNTSEL0 0x00000028
597#define MSR_KNC_EVNTSEL1 0x00000029
598
599/* Alternative perfctr range with full access. */
600#define MSR_IA32_PMC0 0x000004c1
601
602/* AMD64 MSRs. Not complete. See the architecture manual for a more
603 complete list. */
604
605#define MSR_AMD64_PATCH_LEVEL 0x0000008b
606#define MSR_AMD64_TSC_RATIO 0xc0000104
607#define MSR_AMD64_NB_CFG 0xc001001f
608#define MSR_AMD64_PATCH_LOADER 0xc0010020
609#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
610#define MSR_AMD64_OSVW_STATUS 0xc0010141
611#define MSR_AMD64_LS_CFG 0xc0011020
612#define MSR_AMD64_DC_CFG 0xc0011022
613#define MSR_AMD64_BU_CFG2 0xc001102a
614#define MSR_AMD64_IBSFETCHCTL 0xc0011030
615#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
616#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
617#define MSR_AMD64_IBSFETCH_REG_COUNT 3
618#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
619#define MSR_AMD64_IBSOPCTL 0xc0011033
620#define MSR_AMD64_IBSOPRIP 0xc0011034
621#define MSR_AMD64_IBSOPDATA 0xc0011035
622#define MSR_AMD64_IBSOPDATA2 0xc0011036
623#define MSR_AMD64_IBSOPDATA3 0xc0011037
624#define MSR_AMD64_IBSDCLINAD 0xc0011038
625#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
626#define MSR_AMD64_IBSOP_REG_COUNT 7
627#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
628#define MSR_AMD64_IBSCTL 0xc001103a
629#define MSR_AMD64_IBSBRTARGET 0xc001103b
630#define MSR_AMD64_IBSOPDATA4 0xc001103d
631#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
632#define MSR_AMD64_SEV 0xc0010131
633#define MSR_AMD64_SEV_ENABLED_BIT 0
634#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
635
636/* Fam 17h MSRs */
637#define MSR_F17H_IRPERF 0xc00000e9
638
639/* Fam 16h MSRs */
640#define MSR_F16H_L2I_PERF_CTL 0xc0010230
641#define MSR_F16H_L2I_PERF_CTR 0xc0010231
642#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
643#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
644#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
645#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
646
647/* Fam 15h MSRs */
648#define MSR_F15H_PERF_CTL 0xc0010200
649#define MSR_F15H_PERF_CTR 0xc0010201
650#define MSR_F15H_NB_PERF_CTL 0xc0010240
651#define MSR_F15H_NB_PERF_CTR 0xc0010241
652#define MSR_F15H_PTSC 0xc0010280
653#define MSR_F15H_IC_CFG 0xc0011021
654
655/* Fam 10h MSRs */
656#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
657#define FAM10H_MMIO_CONF_ENABLE (1<<0)
658#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
659#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
660#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
661#define FAM10H_MMIO_CONF_BASE_SHIFT 20
662#define MSR_FAM10H_NODE_ID 0xc001100c
663#define MSR_F10H_DECFG 0xc0011029
664#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
665#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
666
667/* K8 MSRs */
668#define MSR_K8_TOP_MEM1 0xc001001a
669#define MSR_K8_TOP_MEM2 0xc001001d
670#define MSR_K8_SYSCFG 0xc0010010
671#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
672#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
673#define MSR_K8_INT_PENDING_MSG 0xc0010055
674/* C1E active bits in int pending message */
675#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
676#define MSR_K8_TSEG_ADDR 0xc0010112
677#define MSR_K8_TSEG_MASK 0xc0010113
678#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
679#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
680#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
681
682/* K7 MSRs */
683#define MSR_K7_EVNTSEL0 0xc0010000
684#define MSR_K7_PERFCTR0 0xc0010004
685#define MSR_K7_EVNTSEL1 0xc0010001
686#define MSR_K7_PERFCTR1 0xc0010005
687#define MSR_K7_EVNTSEL2 0xc0010002
688#define MSR_K7_PERFCTR2 0xc0010006
689#define MSR_K7_EVNTSEL3 0xc0010003
690#define MSR_K7_PERFCTR3 0xc0010007
691#define MSR_K7_CLK_CTL 0xc001001b
692#define MSR_K7_HWCR 0xc0010015
693#define MSR_K7_HWCR_SMMLOCK_BIT 0
694#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
695#define MSR_K7_FID_VID_CTL 0xc0010041
696#define MSR_K7_FID_VID_STATUS 0xc0010042
697
698/* K6 MSRs */
699#define MSR_K6_WHCR 0xc0000082
700#define MSR_K6_UWCCR 0xc0000085
701#define MSR_K6_EPMR 0xc0000086
702#define MSR_K6_PSOR 0xc0000087
703#define MSR_K6_PFIR 0xc0000088
704
705/* Centaur-Hauls/IDT defined MSRs. */
706#define MSR_IDT_FCR1 0x00000107
707#define MSR_IDT_FCR2 0x00000108
708#define MSR_IDT_FCR3 0x00000109
709#define MSR_IDT_FCR4 0x0000010a
710
711#define MSR_IDT_MCR0 0x00000110
712#define MSR_IDT_MCR1 0x00000111
713#define MSR_IDT_MCR2 0x00000112
714#define MSR_IDT_MCR3 0x00000113
715#define MSR_IDT_MCR4 0x00000114
716#define MSR_IDT_MCR5 0x00000115
717#define MSR_IDT_MCR6 0x00000116
718#define MSR_IDT_MCR7 0x00000117
719#define MSR_IDT_MCR_CTRL 0x00000120
720
721/* VIA Cyrix defined MSRs*/
722#define MSR_VIA_FCR 0x00001107
723#define MSR_VIA_LONGHAUL 0x0000110a
724#define MSR_VIA_RNG 0x0000110b
725#define MSR_VIA_BCR2 0x00001147
726
727/* Transmeta defined MSRs */
728#define MSR_TMTA_LONGRUN_CTRL 0x80868010
729#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
730#define MSR_TMTA_LRTI_READOUT 0x80868018
731#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
732
733/* Intel defined MSRs. */
734#define MSR_IA32_P5_MC_ADDR 0x00000000
735#define MSR_IA32_P5_MC_TYPE 0x00000001
736#define MSR_IA32_TSC 0x00000010
737#define MSR_IA32_PLATFORM_ID 0x00000017
738#define MSR_IA32_EBL_CR_POWERON 0x0000002a
739#define MSR_EBC_FREQUENCY_ID 0x0000002c
740#define MSR_SMI_COUNT 0x00000034
741#define MSR_IA32_FEATURE_CONTROL 0x0000003a
742#define MSR_IA32_TSC_ADJUST 0x0000003b
743#define MSR_IA32_BNDCFGS 0x00000d90
744
745#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
746
747#define MSR_IA32_XSS 0x00000da0
748
749#define FEATURE_CONTROL_LOCKED (1<<0)
750#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
751#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
752#define FEATURE_CONTROL_LMCE (1<<20)
753
754#define MSR_IA32_APICBASE 0x0000001b
755#define MSR_IA32_APICBASE_BSP (1<<8)
756#define MSR_IA32_APICBASE_ENABLE (1<<11)
757#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
758
759#define MSR_IA32_TSCDEADLINE 0x000006e0
760
761#define MSR_IA32_UCODE_WRITE 0x00000079
762#define MSR_IA32_UCODE_REV 0x0000008b
763
764#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
765#define MSR_IA32_SMBASE 0x0000009e
766
767#define MSR_IA32_PERF_STATUS 0x00000198
768#define MSR_IA32_PERF_CTL 0x00000199
769#define INTEL_PERF_CTL_MASK 0xffff
770#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
771#define MSR_AMD_PERF_STATUS 0xc0010063
772#define MSR_AMD_PERF_CTL 0xc0010062
773
774#define MSR_IA32_MPERF 0x000000e7
775#define MSR_IA32_APERF 0x000000e8
776
777#define MSR_IA32_THERM_CONTROL 0x0000019a
778#define MSR_IA32_THERM_INTERRUPT 0x0000019b
779
780#define THERM_INT_HIGH_ENABLE (1 << 0)
781#define THERM_INT_LOW_ENABLE (1 << 1)
782#define THERM_INT_PLN_ENABLE (1 << 24)
783
784#define MSR_IA32_THERM_STATUS 0x0000019c
785
786#define THERM_STATUS_PROCHOT (1 << 0)
787#define THERM_STATUS_POWER_LIMIT (1 << 10)
788
789#define MSR_THERM2_CTL 0x0000019d
790
791#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
792
793#define MSR_IA32_MISC_ENABLE 0x000001a0
794
795#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
796
797#define MSR_MISC_FEATURE_CONTROL 0x000001a4
798#define MSR_MISC_PWR_MGMT 0x000001aa
799
800#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
801#define ENERGY_PERF_BIAS_PERFORMANCE 0
802#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
803#define ENERGY_PERF_BIAS_NORMAL 6
804#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
805#define ENERGY_PERF_BIAS_POWERSAVE 15
806
807#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
808
809#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
810#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
811
812#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
813
814#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
815#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
816#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
817
818/* Thermal Thresholds Support */
819#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
820#define THERM_SHIFT_THRESHOLD0 8
821#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
822#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
823#define THERM_SHIFT_THRESHOLD1 16
824#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
825#define THERM_STATUS_THRESHOLD0 (1 << 6)
826#define THERM_LOG_THRESHOLD0 (1 << 7)
827#define THERM_STATUS_THRESHOLD1 (1 << 8)
828#define THERM_LOG_THRESHOLD1 (1 << 9)
829
830/* MISC_ENABLE bits: architectural */
831#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
832#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
833#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
834#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
835#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
836#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
837#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
838#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
839#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
840#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
841#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
842#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
843#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
844#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
845#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
846#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
847#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
848#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
849#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
850#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
851
852/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
853#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
854#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
855#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
856#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
857#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
858#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
859#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
860#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
861#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
862#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
863#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
864#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
865#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
866#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
867#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
868#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
869#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
870#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
871#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
872#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
873#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
874#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
875#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
876#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
877#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
878#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
879#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
880#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
881#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
882#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
883
884/* MISC_FEATURES_ENABLES non-architectural features */
885#define MSR_MISC_FEATURES_ENABLES 0x00000140
886
887#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
888#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
889#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
890
891#define MSR_IA32_TSC_DEADLINE 0x000006E0
892
893/* P4/Xeon+ specific */
894#define MSR_IA32_MCG_EAX 0x00000180
895#define MSR_IA32_MCG_EBX 0x00000181
896#define MSR_IA32_MCG_ECX 0x00000182
897#define MSR_IA32_MCG_EDX 0x00000183
898#define MSR_IA32_MCG_ESI 0x00000184
899#define MSR_IA32_MCG_EDI 0x00000185
900#define MSR_IA32_MCG_EBP 0x00000186
901#define MSR_IA32_MCG_ESP 0x00000187
902#define MSR_IA32_MCG_EFLAGS 0x00000188
903#define MSR_IA32_MCG_EIP 0x00000189
904#define MSR_IA32_MCG_RESERVED 0x0000018a
905
906/* Pentium IV performance counter MSRs */
907#define MSR_P4_BPU_PERFCTR0 0x00000300
908#define MSR_P4_BPU_PERFCTR1 0x00000301
909#define MSR_P4_BPU_PERFCTR2 0x00000302
910#define MSR_P4_BPU_PERFCTR3 0x00000303
911#define MSR_P4_MS_PERFCTR0 0x00000304
912#define MSR_P4_MS_PERFCTR1 0x00000305
913#define MSR_P4_MS_PERFCTR2 0x00000306
914#define MSR_P4_MS_PERFCTR3 0x00000307
915#define MSR_P4_FLAME_PERFCTR0 0x00000308
916#define MSR_P4_FLAME_PERFCTR1 0x00000309
917#define MSR_P4_FLAME_PERFCTR2 0x0000030a
918#define MSR_P4_FLAME_PERFCTR3 0x0000030b
919#define MSR_P4_IQ_PERFCTR0 0x0000030c
920#define MSR_P4_IQ_PERFCTR1 0x0000030d
921#define MSR_P4_IQ_PERFCTR2 0x0000030e
922#define MSR_P4_IQ_PERFCTR3 0x0000030f
923#define MSR_P4_IQ_PERFCTR4 0x00000310
924#define MSR_P4_IQ_PERFCTR5 0x00000311
925#define MSR_P4_BPU_CCCR0 0x00000360
926#define MSR_P4_BPU_CCCR1 0x00000361
927#define MSR_P4_BPU_CCCR2 0x00000362
928#define MSR_P4_BPU_CCCR3 0x00000363
929#define MSR_P4_MS_CCCR0 0x00000364
930#define MSR_P4_MS_CCCR1 0x00000365
931#define MSR_P4_MS_CCCR2 0x00000366
932#define MSR_P4_MS_CCCR3 0x00000367
933#define MSR_P4_FLAME_CCCR0 0x00000368
934#define MSR_P4_FLAME_CCCR1 0x00000369
935#define MSR_P4_FLAME_CCCR2 0x0000036a
936#define MSR_P4_FLAME_CCCR3 0x0000036b
937#define MSR_P4_IQ_CCCR0 0x0000036c
938#define MSR_P4_IQ_CCCR1 0x0000036d
939#define MSR_P4_IQ_CCCR2 0x0000036e
940#define MSR_P4_IQ_CCCR3 0x0000036f
941#define MSR_P4_IQ_CCCR4 0x00000370
942#define MSR_P4_IQ_CCCR5 0x00000371
943#define MSR_P4_ALF_ESCR0 0x000003ca
944#define MSR_P4_ALF_ESCR1 0x000003cb
945#define MSR_P4_BPU_ESCR0 0x000003b2
946#define MSR_P4_BPU_ESCR1 0x000003b3
947#define MSR_P4_BSU_ESCR0 0x000003a0
948#define MSR_P4_BSU_ESCR1 0x000003a1
949#define MSR_P4_CRU_ESCR0 0x000003b8
950#define MSR_P4_CRU_ESCR1 0x000003b9
951#define MSR_P4_CRU_ESCR2 0x000003cc
952#define MSR_P4_CRU_ESCR3 0x000003cd
953#define MSR_P4_CRU_ESCR4 0x000003e0
954#define MSR_P4_CRU_ESCR5 0x000003e1
955#define MSR_P4_DAC_ESCR0 0x000003a8
956#define MSR_P4_DAC_ESCR1 0x000003a9
957#define MSR_P4_FIRM_ESCR0 0x000003a4
958#define MSR_P4_FIRM_ESCR1 0x000003a5
959#define MSR_P4_FLAME_ESCR0 0x000003a6
960#define MSR_P4_FLAME_ESCR1 0x000003a7
961#define MSR_P4_FSB_ESCR0 0x000003a2
962#define MSR_P4_FSB_ESCR1 0x000003a3
963#define MSR_P4_IQ_ESCR0 0x000003ba
964#define MSR_P4_IQ_ESCR1 0x000003bb
965#define MSR_P4_IS_ESCR0 0x000003b4
966#define MSR_P4_IS_ESCR1 0x000003b5
967#define MSR_P4_ITLB_ESCR0 0x000003b6
968#define MSR_P4_ITLB_ESCR1 0x000003b7
969#define MSR_P4_IX_ESCR0 0x000003c8
970#define MSR_P4_IX_ESCR1 0x000003c9
971#define MSR_P4_MOB_ESCR0 0x000003aa
972#define MSR_P4_MOB_ESCR1 0x000003ab
973#define MSR_P4_MS_ESCR0 0x000003c0
974#define MSR_P4_MS_ESCR1 0x000003c1
975#define MSR_P4_PMH_ESCR0 0x000003ac
976#define MSR_P4_PMH_ESCR1 0x000003ad
977#define MSR_P4_RAT_ESCR0 0x000003bc
978#define MSR_P4_RAT_ESCR1 0x000003bd
979#define MSR_P4_SAAT_ESCR0 0x000003ae
980#define MSR_P4_SAAT_ESCR1 0x000003af
981#define MSR_P4_SSU_ESCR0 0x000003be
982#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
983
984#define MSR_P4_TBPU_ESCR0 0x000003c2
985#define MSR_P4_TBPU_ESCR1 0x000003c3
986#define MSR_P4_TC_ESCR0 0x000003c4
987#define MSR_P4_TC_ESCR1 0x000003c5
988#define MSR_P4_U2L_ESCR0 0x000003b0
989#define MSR_P4_U2L_ESCR1 0x000003b1
990
991#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
992
993/* Intel Core-based CPU performance counters */
994#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
995#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
996#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
997#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
998#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
999#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
1000#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
1001
1002/* Geode defined MSRs */
1003#define MSR_GEODE_BUSCONT_CONF0 0x00001900
1004
1005/* Intel VT MSRs */
1006#define MSR_IA32_VMX_BASIC 0x00000480
1007#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
1008#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
1009#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
1010#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
1011#define MSR_IA32_VMX_MISC 0x00000485
1012#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
1013#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
1014#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
1015#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
1016#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
1017#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
1018#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
1019#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
1020#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
1021#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
1022#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
1023#define MSR_IA32_VMX_VMFUNC 0x00000491
1024
1025/* VMX_BASIC bits and bitmasks */
1026#define VMX_BASIC_VMCS_SIZE_SHIFT 32
1027#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
1028#define VMX_BASIC_64 0x0001000000000000LLU
1029#define VMX_BASIC_MEM_TYPE_SHIFT 50
1030#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
1031#define VMX_BASIC_MEM_TYPE_WB 6LLU
1032#define VMX_BASIC_INOUT 0x0040000000000000LLU
1033
1034/* MSR_IA32_VMX_MISC bits */
1035#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
1036#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
1037/* AMD-V MSRs */
1038
1039#define MSR_VM_CR 0xc0010114
1040#define MSR_VM_IGNNE 0xc0010115
1041#define MSR_VM_HSAVE_PA 0xc0010117
1042
1043#endif /* !SELFTEST_KVM_X86_H */
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000000..c9f5b7d4ce38
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,87 @@
1/*
2 * tools/testing/selftests/kvm/lib/assert.c
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 */
8
9#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
10
11#include "test_util.h"
12
13#include <execinfo.h>
14#include <sys/syscall.h>
15
16/* Dumps the current stack trace to stderr. */
17static void __attribute__((noinline)) test_dump_stack(void);
18static void test_dump_stack(void)
19{
20 /*
21 * Build and run this command:
22 *
23 * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
24 * grep -v test_dump_stack | cat -n 1>&2
25 *
26 * Note that the spacing is different and there's no newline.
27 */
28 size_t i;
29 size_t n = 20;
30 void *stack[n];
31 const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
32 const char *pipeline = "|cat -n 1>&2";
33 char cmd[strlen(addr2line) + strlen(pipeline) +
34 /* N bytes per addr * 2 digits per byte + 1 space per addr: */
35 n * (((sizeof(void *)) * 2) + 1) +
36 /* Null terminator: */
37 1];
38 char *c;
39
40 n = backtrace(stack, n);
41 c = &cmd[0];
42 c += sprintf(c, "%s", addr2line);
43 /*
44 * Skip the first 3 frames: backtrace, test_dump_stack, and
45 * test_assert. We hope that backtrace isn't inlined and the other two
46 * we've declared noinline.
47 */
48 for (i = 2; i < n; i++)
49 c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
50 c += sprintf(c, "%s", pipeline);
51#pragma GCC diagnostic push
52#pragma GCC diagnostic ignored "-Wunused-result"
53 system(cmd);
54#pragma GCC diagnostic pop
55}
56
57static pid_t gettid(void)
58{
59 return syscall(SYS_gettid);
60}
61
62void __attribute__((noinline))
63test_assert(bool exp, const char *exp_str,
64 const char *file, unsigned int line, const char *fmt, ...)
65{
66 va_list ap;
67
68 if (!(exp)) {
69 va_start(ap, fmt);
70
71 fprintf(stderr, "==== Test Assertion Failure ====\n"
72 " %s:%u: %s\n"
73 " pid=%d tid=%d\n",
74 file, line, exp_str, getpid(), gettid());
75 test_dump_stack();
76 if (fmt) {
77 fputs(" ", stderr);
78 vfprintf(stderr, fmt, ap);
79 fputs("\n", stderr);
80 }
81 va_end(ap);
82
83 exit(254);
84 }
85
86 return;
87}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000000..7ca1bb40c498
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1480 @@
1/*
2 * tools/testing/selftests/kvm/lib/kvm_util.c
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 */
8
9#include "test_util.h"
10#include "kvm_util.h"
11#include "kvm_util_internal.h"
12
13#include <assert.h>
14#include <sys/mman.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17
18#define KVM_DEV_PATH "/dev/kvm"
19
20#define KVM_UTIL_PGS_PER_HUGEPG 512
21#define KVM_UTIL_MIN_PADDR 0x2000
22
23/* Aligns x up to the next multiple of size. Size must be a power of 2. */
24static void *align(void *x, size_t size)
25{
26 size_t mask = size - 1;
27 TEST_ASSERT(size != 0 && !(size & (size - 1)),
28 "size not a power of 2: %lu", size);
29 return (void *) (((size_t) x + mask) & ~mask);
30}
31
32/* Capability
33 *
34 * Input Args:
35 * cap - Capability
36 *
37 * Output Args: None
38 *
39 * Return:
40 * On success, the Value corresponding to the capability (KVM_CAP_*)
41 * specified by the value of cap. On failure a TEST_ASSERT failure
42 * is produced.
43 *
44 * Looks up and returns the value corresponding to the capability
45 * (KVM_CAP_*) given by cap.
46 */
47int kvm_check_cap(long cap)
48{
49 int ret;
50 int kvm_fd;
51
52 kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
53 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
54 KVM_DEV_PATH, kvm_fd, errno);
55
56 ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
57 TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
58 " rc: %i errno: %i", ret, errno);
59
60 close(kvm_fd);
61
62 return ret;
63}
64
65/* VM Create
66 *
67 * Input Args:
68 * mode - VM Mode (e.g. VM_MODE_FLAT48PG)
69 * phy_pages - Physical memory pages
70 * perm - permission
71 *
72 * Output Args: None
73 *
74 * Return:
75 * Pointer to opaque structure that describes the created VM.
76 *
77 * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
78 * When phy_pages is non-zero, a memory region of phy_pages physical pages
79 * is created and mapped starting at guest physical address 0. The file
80 * descriptor to control the created VM is created with the permissions
81 * given by perm (e.g. O_RDWR).
82 */
83struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
84{
85 struct kvm_vm *vm;
86 int kvm_fd;
87
88 /* Allocate memory. */
89 vm = calloc(1, sizeof(*vm));
90 TEST_ASSERT(vm != NULL, "Insufficent Memory");
91
92 vm->mode = mode;
93 kvm_fd = open(KVM_DEV_PATH, perm);
94 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
95 KVM_DEV_PATH, kvm_fd, errno);
96
97 /* Create VM. */
98 vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
99 TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
100 "rc: %i errno: %i", vm->fd, errno);
101
102 close(kvm_fd);
103
104 /* Setup mode specific traits. */
105 switch (vm->mode) {
106 case VM_MODE_FLAT48PG:
107 vm->page_size = 0x1000;
108 vm->page_shift = 12;
109
110 /* Limit to 48-bit canonical virtual addresses. */
111 vm->vpages_valid = sparsebit_alloc();
112 sparsebit_set_num(vm->vpages_valid,
113 0, (1ULL << (48 - 1)) >> vm->page_shift);
114 sparsebit_set_num(vm->vpages_valid,
115 (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
116 (1ULL << (48 - 1)) >> vm->page_shift);
117
118 /* Limit physical addresses to 52-bits. */
119 vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
120 break;
121
122 default:
123 TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
124 }
125
126 /* Allocate and setup memory for guest. */
127 vm->vpages_mapped = sparsebit_alloc();
128 if (phy_pages != 0)
129 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
130 0, 0, phy_pages, 0);
131
132 return vm;
133}
134
135/* Userspace Memory Region Find
136 *
137 * Input Args:
138 * vm - Virtual Machine
139 * start - Starting VM physical address
140 * end - Ending VM physical address, inclusive.
141 *
142 * Output Args: None
143 *
144 * Return:
145 * Pointer to overlapping region, NULL if no such region.
146 *
147 * Searches for a region with any physical memory that overlaps with
148 * any portion of the guest physical addresses from start to end
149 * inclusive. If multiple overlapping regions exist, a pointer to any
150 * of the regions is returned. Null is returned only when no overlapping
151 * region exists.
152 */
153static struct userspace_mem_region *userspace_mem_region_find(
154 struct kvm_vm *vm, uint64_t start, uint64_t end)
155{
156 struct userspace_mem_region *region;
157
158 for (region = vm->userspace_mem_region_head; region;
159 region = region->next) {
160 uint64_t existing_start = region->region.guest_phys_addr;
161 uint64_t existing_end = region->region.guest_phys_addr
162 + region->region.memory_size - 1;
163 if (start <= existing_end && end >= existing_start)
164 return region;
165 }
166
167 return NULL;
168}
169
170/* KVM Userspace Memory Region Find
171 *
172 * Input Args:
173 * vm - Virtual Machine
174 * start - Starting VM physical address
175 * end - Ending VM physical address, inclusive.
176 *
177 * Output Args: None
178 *
179 * Return:
180 * Pointer to overlapping region, NULL if no such region.
181 *
182 * Public interface to userspace_mem_region_find. Allows tests to look up
183 * the memslot datastructure for a given range of guest physical memory.
184 */
185struct kvm_userspace_memory_region *
186kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
187 uint64_t end)
188{
189 struct userspace_mem_region *region;
190
191 region = userspace_mem_region_find(vm, start, end);
192 if (!region)
193 return NULL;
194
195 return &region->region;
196}
197
198/* VCPU Find
199 *
200 * Input Args:
201 * vm - Virtual Machine
202 * vcpuid - VCPU ID
203 *
204 * Output Args: None
205 *
206 * Return:
207 * Pointer to VCPU structure
208 *
209 * Locates a vcpu structure that describes the VCPU specified by vcpuid and
210 * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
211 * for the specified vcpuid.
212 */
213struct vcpu *vcpu_find(struct kvm_vm *vm,
214 uint32_t vcpuid)
215{
216 struct vcpu *vcpup;
217
218 for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
219 if (vcpup->id == vcpuid)
220 return vcpup;
221 }
222
223 return NULL;
224}
225
226/* VM VCPU Remove
227 *
228 * Input Args:
229 * vm - Virtual Machine
230 * vcpuid - VCPU ID
231 *
232 * Output Args: None
233 *
234 * Return: None, TEST_ASSERT failures for all error conditions
235 *
236 * Within the VM specified by vm, removes the VCPU given by vcpuid.
237 */
238static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
239{
240 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
241
242 int ret = close(vcpu->fd);
243 TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
244 "errno: %i", ret, errno);
245
246 if (vcpu->next)
247 vcpu->next->prev = vcpu->prev;
248 if (vcpu->prev)
249 vcpu->prev->next = vcpu->next;
250 else
251 vm->vcpu_head = vcpu->next;
252 free(vcpu);
253}
254
255
256/* Destroys and frees the VM pointed to by vmp.
257 */
258void kvm_vm_free(struct kvm_vm *vmp)
259{
260 int ret;
261
262 if (vmp == NULL)
263 return;
264
265 /* Free userspace_mem_regions. */
266 while (vmp->userspace_mem_region_head) {
267 struct userspace_mem_region *region
268 = vmp->userspace_mem_region_head;
269
270 region->region.memory_size = 0;
271 ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
272 &region->region);
273 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
274 "rc: %i errno: %i", ret, errno);
275
276 vmp->userspace_mem_region_head = region->next;
277 sparsebit_free(&region->unused_phy_pages);
278 ret = munmap(region->mmap_start, region->mmap_size);
279 TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
280 ret, errno);
281
282 free(region);
283 }
284
285 /* Free VCPUs. */
286 while (vmp->vcpu_head)
287 vm_vcpu_rm(vmp, vmp->vcpu_head->id);
288
289 /* Free sparsebit arrays. */
290 sparsebit_free(&vmp->vpages_valid);
291 sparsebit_free(&vmp->vpages_mapped);
292
293 /* Close file descriptor for the VM. */
294 ret = close(vmp->fd);
295 TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
296 " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
297
298 /* Free the structure describing the VM. */
299 free(vmp);
300}
301
302/* Memory Compare, host virtual to guest virtual
303 *
304 * Input Args:
305 * hva - Starting host virtual address
306 * vm - Virtual Machine
307 * gva - Starting guest virtual address
308 * len - number of bytes to compare
309 *
310 * Output Args: None
311 *
312 * Input/Output Args: None
313 *
314 * Return:
315 * Returns 0 if the bytes starting at hva for a length of len
316 * are equal the guest virtual bytes starting at gva. Returns
317 * a value < 0, if bytes at hva are less than those at gva.
318 * Otherwise a value > 0 is returned.
319 *
320 * Compares the bytes starting at the host virtual address hva, for
321 * a length of len, to the guest bytes starting at the guest virtual
322 * address given by gva.
323 */
324int kvm_memcmp_hva_gva(void *hva,
325 struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
326{
327 size_t amt;
328
329 /* Compare a batch of bytes until either a match is found
330 * or all the bytes have been compared.
331 */
332 for (uintptr_t offset = 0; offset < len; offset += amt) {
333 uintptr_t ptr1 = (uintptr_t)hva + offset;
334
335 /* Determine host address for guest virtual address
336 * at offset.
337 */
338 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
339
340 /* Determine amount to compare on this pass.
341 * Don't allow the comparsion to cross a page boundary.
342 */
343 amt = len - offset;
344 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
345 amt = vm->page_size - (ptr1 % vm->page_size);
346 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
347 amt = vm->page_size - (ptr2 % vm->page_size);
348
349 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
350 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
351
352 /* Perform the comparison. If there is a difference
353 * return that result to the caller, otherwise need
354 * to continue on looking for a mismatch.
355 */
356 int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
357 if (ret != 0)
358 return ret;
359 }
360
361 /* No mismatch found. Let the caller know the two memory
362 * areas are equal.
363 */
364 return 0;
365}
366
367/* Allocate an instance of struct kvm_cpuid2
368 *
369 * Input Args: None
370 *
371 * Output Args: None
372 *
373 * Return: A pointer to the allocated struct. The caller is responsible
374 * for freeing this struct.
375 *
376 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
377 * array to be decided at allocation time, allocation is slightly
378 * complicated. This function uses a reasonable default length for
379 * the array and performs the appropriate allocation.
380 */
381struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
382{
383 struct kvm_cpuid2 *cpuid;
384 int nent = 100;
385 size_t size;
386
387 size = sizeof(*cpuid);
388 size += nent * sizeof(struct kvm_cpuid_entry2);
389 cpuid = malloc(size);
390 if (!cpuid) {
391 perror("malloc");
392 abort();
393 }
394
395 cpuid->nent = nent;
396
397 return cpuid;
398}
399
400/* KVM Supported CPUID Get
401 *
402 * Input Args: None
403 *
404 * Output Args:
405 * cpuid - The supported KVM CPUID
406 *
407 * Return: void
408 *
409 * Get the guest CPUID supported by KVM.
410 */
411void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid)
412{
413 int ret;
414 int kvm_fd;
415
416 kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
417 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
418 KVM_DEV_PATH, kvm_fd, errno);
419
420 ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
421 TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
422 ret, errno);
423
424 close(kvm_fd);
425}
426
427/* Locate a cpuid entry.
428 *
429 * Input Args:
430 * cpuid: The cpuid.
431 * function: The function of the cpuid entry to find.
432 *
433 * Output Args: None
434 *
435 * Return: A pointer to the cpuid entry. Never returns NULL.
436 */
437struct kvm_cpuid_entry2 *
438find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
439 uint32_t index)
440{
441 struct kvm_cpuid_entry2 *entry = NULL;
442 int i;
443
444 for (i = 0; i < cpuid->nent; i++) {
445 if (cpuid->entries[i].function == function &&
446 cpuid->entries[i].index == index) {
447 entry = &cpuid->entries[i];
448 break;
449 }
450 }
451
452 TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
453 function, index);
454 return entry;
455}
456
457/* VM Userspace Memory Region Add
458 *
459 * Input Args:
460 * vm - Virtual Machine
461 * backing_src - Storage source for this region.
462 * NULL to use anonymous memory.
463 * guest_paddr - Starting guest physical address
464 * slot - KVM region slot
465 * npages - Number of physical pages
466 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
467 *
468 * Output Args: None
469 *
470 * Return: None
471 *
472 * Allocates a memory area of the number of pages specified by npages
473 * and maps it to the VM specified by vm, at a starting physical address
474 * given by guest_paddr. The region is created with a KVM region slot
475 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
476 * region is created with the flags given by flags.
477 */
478void vm_userspace_mem_region_add(struct kvm_vm *vm,
479 enum vm_mem_backing_src_type src_type,
480 uint64_t guest_paddr, uint32_t slot, uint64_t npages,
481 uint32_t flags)
482{
483 int ret;
484 unsigned long pmem_size = 0;
485 struct userspace_mem_region *region;
486 size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
487
488 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
489 "address not on a page boundary.\n"
490 " guest_paddr: 0x%lx vm->page_size: 0x%x",
491 guest_paddr, vm->page_size);
492 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
493 <= vm->max_gfn, "Physical range beyond maximum "
494 "supported physical address,\n"
495 " guest_paddr: 0x%lx npages: 0x%lx\n"
496 " vm->max_gfn: 0x%lx vm->page_size: 0x%x",
497 guest_paddr, npages, vm->max_gfn, vm->page_size);
498
499 /* Confirm a mem region with an overlapping address doesn't
500 * already exist.
501 */
502 region = (struct userspace_mem_region *) userspace_mem_region_find(
503 vm, guest_paddr, guest_paddr + npages * vm->page_size);
504 if (region != NULL)
505 TEST_ASSERT(false, "overlapping userspace_mem_region already "
506 "exists\n"
507 " requested guest_paddr: 0x%lx npages: 0x%lx "
508 "page_size: 0x%x\n"
509 " existing guest_paddr: 0x%lx size: 0x%lx",
510 guest_paddr, npages, vm->page_size,
511 (uint64_t) region->region.guest_phys_addr,
512 (uint64_t) region->region.memory_size);
513
514 /* Confirm no region with the requested slot already exists. */
515 for (region = vm->userspace_mem_region_head; region;
516 region = region->next) {
517 if (region->region.slot == slot)
518 break;
519 if ((guest_paddr <= (region->region.guest_phys_addr
520 + region->region.memory_size))
521 && ((guest_paddr + npages * vm->page_size)
522 >= region->region.guest_phys_addr))
523 break;
524 }
525 if (region != NULL)
526 TEST_ASSERT(false, "A mem region with the requested slot "
527 "or overlapping physical memory range already exists.\n"
528 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
529 " existing slot: %u paddr: 0x%lx size: 0x%lx",
530 slot, guest_paddr, npages,
531 region->region.slot,
532 (uint64_t) region->region.guest_phys_addr,
533 (uint64_t) region->region.memory_size);
534
535 /* Allocate and initialize new mem region structure. */
536 region = calloc(1, sizeof(*region));
537 TEST_ASSERT(region != NULL, "Insufficient Memory");
538 region->mmap_size = npages * vm->page_size;
539
540 /* Enough memory to align up to a huge page. */
541 if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
542 region->mmap_size += huge_page_size;
543 region->mmap_start = mmap(NULL, region->mmap_size,
544 PROT_READ | PROT_WRITE,
545 MAP_PRIVATE | MAP_ANONYMOUS
546 | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
547 -1, 0);
548 TEST_ASSERT(region->mmap_start != MAP_FAILED,
549 "test_malloc failed, mmap_start: %p errno: %i",
550 region->mmap_start, errno);
551
552 /* Align THP allocation up to start of a huge page. */
553 region->host_mem = align(region->mmap_start,
554 src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
555
556 /* As needed perform madvise */
557 if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
558 ret = madvise(region->host_mem, npages * vm->page_size,
559 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
560 TEST_ASSERT(ret == 0, "madvise failed,\n"
561 " addr: %p\n"
562 " length: 0x%lx\n"
563 " src_type: %x",
564 region->host_mem, npages * vm->page_size, src_type);
565 }
566
567 region->unused_phy_pages = sparsebit_alloc();
568 sparsebit_set_num(region->unused_phy_pages,
569 guest_paddr >> vm->page_shift, npages);
570 region->region.slot = slot;
571 region->region.flags = flags;
572 region->region.guest_phys_addr = guest_paddr;
573 region->region.memory_size = npages * vm->page_size;
574 region->region.userspace_addr = (uintptr_t) region->host_mem;
575 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
576 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
577 " rc: %i errno: %i\n"
578 " slot: %u flags: 0x%x\n"
579 " guest_phys_addr: 0x%lx size: 0x%lx",
580 ret, errno, slot, flags,
581 guest_paddr, (uint64_t) region->region.memory_size);
582
583 /* Add to linked-list of memory regions. */
584 if (vm->userspace_mem_region_head)
585 vm->userspace_mem_region_head->prev = region;
586 region->next = vm->userspace_mem_region_head;
587 vm->userspace_mem_region_head = region;
588}
589
590/* Memslot to region
591 *
592 * Input Args:
593 * vm - Virtual Machine
594 * memslot - KVM memory slot ID
595 *
596 * Output Args: None
597 *
598 * Return:
599 * Pointer to memory region structure that describe memory region
600 * using kvm memory slot ID given by memslot. TEST_ASSERT failure
601 * on error (e.g. currently no memory region using memslot as a KVM
602 * memory slot ID).
603 */
604static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
605 uint32_t memslot)
606{
607 struct userspace_mem_region *region;
608
609 for (region = vm->userspace_mem_region_head; region;
610 region = region->next) {
611 if (region->region.slot == memslot)
612 break;
613 }
614 if (region == NULL) {
615 fprintf(stderr, "No mem region with the requested slot found,\n"
616 " requested slot: %u\n", memslot);
617 fputs("---- vm dump ----\n", stderr);
618 vm_dump(stderr, vm, 2);
619 TEST_ASSERT(false, "Mem region not found");
620 }
621
622 return region;
623}
624
625/* VM Memory Region Flags Set
626 *
627 * Input Args:
628 * vm - Virtual Machine
629 * flags - Starting guest physical address
630 *
631 * Output Args: None
632 *
633 * Return: None
634 *
635 * Sets the flags of the memory region specified by the value of slot,
636 * to the values given by flags.
637 */
638void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
639{
640 int ret;
641 struct userspace_mem_region *region;
642
643 /* Locate memory region. */
644 region = memslot2region(vm, slot);
645
646 region->region.flags = flags;
647
648 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
649
650 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
651 " rc: %i errno: %i slot: %u flags: 0x%x",
652 ret, errno, slot, flags);
653}
654
655/* VCPU mmap Size
656 *
657 * Input Args: None
658 *
659 * Output Args: None
660 *
661 * Return:
662 * Size of VCPU state
663 *
664 * Returns the size of the structure pointed to by the return value
665 * of vcpu_state().
666 */
667static int vcpu_mmap_sz(void)
668{
669 int dev_fd, ret;
670
671 dev_fd = open(KVM_DEV_PATH, O_RDONLY);
672 TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i",
673 __func__, KVM_DEV_PATH, dev_fd, errno);
674
675 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
676 TEST_ASSERT(ret >= sizeof(struct kvm_run),
677 "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
678 __func__, ret, errno);
679
680 close(dev_fd);
681
682 return ret;
683}
684
685/* VM VCPU Add
686 *
687 * Input Args:
688 * vm - Virtual Machine
689 * vcpuid - VCPU ID
690 *
691 * Output Args: None
692 *
693 * Return: None
694 *
695 * Creates and adds to the VM specified by vm and virtual CPU with
696 * the ID given by vcpuid.
697 */
698void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
699{
700 struct vcpu *vcpu;
701
702 /* Confirm a vcpu with the specified id doesn't already exist. */
703 vcpu = vcpu_find(vm, vcpuid);
704 if (vcpu != NULL)
705 TEST_ASSERT(false, "vcpu with the specified id "
706 "already exists,\n"
707 " requested vcpuid: %u\n"
708 " existing vcpuid: %u state: %p",
709 vcpuid, vcpu->id, vcpu->state);
710
711 /* Allocate and initialize new vcpu structure. */
712 vcpu = calloc(1, sizeof(*vcpu));
713 TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
714 vcpu->id = vcpuid;
715 vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
716 TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
717 vcpu->fd, errno);
718
719 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
720 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
721 vcpu_mmap_sz(), sizeof(*vcpu->state));
722 vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
723 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
724 TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
725 "vcpu id: %u errno: %i", vcpuid, errno);
726
727 /* Add to linked-list of VCPUs. */
728 if (vm->vcpu_head)
729 vm->vcpu_head->prev = vcpu;
730 vcpu->next = vm->vcpu_head;
731 vm->vcpu_head = vcpu;
732
733 vcpu_setup(vm, vcpuid);
734}
735
736/* VM Virtual Address Unused Gap
737 *
738 * Input Args:
739 * vm - Virtual Machine
740 * sz - Size (bytes)
741 * vaddr_min - Minimum Virtual Address
742 *
743 * Output Args: None
744 *
745 * Return:
746 * Lowest virtual address at or below vaddr_min, with at least
747 * sz unused bytes. TEST_ASSERT failure if no area of at least
748 * size sz is available.
749 *
750 * Within the VM specified by vm, locates the lowest starting virtual
751 * address >= vaddr_min, that has at least sz unallocated bytes. A
752 * TEST_ASSERT failure occurs for invalid input or no area of at least
753 * sz unallocated bytes >= vaddr_min is available.
754 */
755static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
756 vm_vaddr_t vaddr_min)
757{
758 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
759
760 /* Determine lowest permitted virtual page index. */
761 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
762 if ((pgidx_start * vm->page_size) < vaddr_min)
763 goto no_va_found;
764
765 /* Loop over section with enough valid virtual page indexes. */
766 if (!sparsebit_is_set_num(vm->vpages_valid,
767 pgidx_start, pages))
768 pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
769 pgidx_start, pages);
770 do {
771 /*
772 * Are there enough unused virtual pages available at
773 * the currently proposed starting virtual page index.
774 * If not, adjust proposed starting index to next
775 * possible.
776 */
777 if (sparsebit_is_clear_num(vm->vpages_mapped,
778 pgidx_start, pages))
779 goto va_found;
780 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
781 pgidx_start, pages);
782 if (pgidx_start == 0)
783 goto no_va_found;
784
785 /*
786 * If needed, adjust proposed starting virtual address,
787 * to next range of valid virtual addresses.
788 */
789 if (!sparsebit_is_set_num(vm->vpages_valid,
790 pgidx_start, pages)) {
791 pgidx_start = sparsebit_next_set_num(
792 vm->vpages_valid, pgidx_start, pages);
793 if (pgidx_start == 0)
794 goto no_va_found;
795 }
796 } while (pgidx_start != 0);
797
798no_va_found:
799 TEST_ASSERT(false, "No vaddr of specified pages available, "
800 "pages: 0x%lx", pages);
801
802 /* NOT REACHED */
803 return -1;
804
805va_found:
806 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
807 pgidx_start, pages),
808 "Unexpected, invalid virtual page index range,\n"
809 " pgidx_start: 0x%lx\n"
810 " pages: 0x%lx",
811 pgidx_start, pages);
812 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
813 pgidx_start, pages),
814 "Unexpected, pages already mapped,\n"
815 " pgidx_start: 0x%lx\n"
816 " pages: 0x%lx",
817 pgidx_start, pages);
818
819 return pgidx_start * vm->page_size;
820}
821
822/* VM Virtual Address Allocate
823 *
824 * Input Args:
825 * vm - Virtual Machine
826 * sz - Size in bytes
827 * vaddr_min - Minimum starting virtual address
828 * data_memslot - Memory region slot for data pages
829 * pgd_memslot - Memory region slot for new virtual translation tables
830 *
831 * Output Args: None
832 *
833 * Return:
834 * Starting guest virtual address
835 *
836 * Allocates at least sz bytes within the virtual address space of the vm
837 * given by vm. The allocated bytes are mapped to a virtual address >=
838 * the address given by vaddr_min. Note that each allocation uses a
839 * a unique set of pages, with the minimum real allocation being at least
840 * a page.
841 */
842vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
843 uint32_t data_memslot, uint32_t pgd_memslot)
844{
845 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
846
847 virt_pgd_alloc(vm, pgd_memslot);
848
849 /* Find an unused range of virtual page addresses of at least
850 * pages in length.
851 */
852 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
853
854 /* Map the virtual pages. */
855 for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
856 pages--, vaddr += vm->page_size) {
857 vm_paddr_t paddr;
858
859 paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
860
861 virt_pg_map(vm, vaddr, paddr, pgd_memslot);
862
863 sparsebit_set(vm->vpages_mapped,
864 vaddr >> vm->page_shift);
865 }
866
867 return vaddr_start;
868}
869
870/* Address VM Physical to Host Virtual
871 *
872 * Input Args:
873 * vm - Virtual Machine
874 * gpa - VM physical address
875 *
876 * Output Args: None
877 *
878 * Return:
879 * Equivalent host virtual address
880 *
881 * Locates the memory region containing the VM physical address given
882 * by gpa, within the VM given by vm. When found, the host virtual
883 * address providing the memory to the vm physical address is returned.
884 * A TEST_ASSERT failure occurs if no region containing gpa exists.
885 */
886void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
887{
888 struct userspace_mem_region *region;
889 for (region = vm->userspace_mem_region_head; region;
890 region = region->next) {
891 if ((gpa >= region->region.guest_phys_addr)
892 && (gpa <= (region->region.guest_phys_addr
893 + region->region.memory_size - 1)))
894 return (void *) ((uintptr_t) region->host_mem
895 + (gpa - region->region.guest_phys_addr));
896 }
897
898 TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
899 return NULL;
900}
901
902/* Address Host Virtual to VM Physical
903 *
904 * Input Args:
905 * vm - Virtual Machine
906 * hva - Host virtual address
907 *
908 * Output Args: None
909 *
910 * Return:
911 * Equivalent VM physical address
912 *
913 * Locates the memory region containing the host virtual address given
914 * by hva, within the VM given by vm. When found, the equivalent
915 * VM physical address is returned. A TEST_ASSERT failure occurs if no
916 * region containing hva exists.
917 */
918vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
919{
920 struct userspace_mem_region *region;
921 for (region = vm->userspace_mem_region_head; region;
922 region = region->next) {
923 if ((hva >= region->host_mem)
924 && (hva <= (region->host_mem
925 + region->region.memory_size - 1)))
926 return (vm_paddr_t) ((uintptr_t)
927 region->region.guest_phys_addr
928 + (hva - (uintptr_t) region->host_mem));
929 }
930
931 TEST_ASSERT(false, "No mapping to a guest physical address, "
932 "hva: %p", hva);
933 return -1;
934}
935
936/* VM Create IRQ Chip
937 *
938 * Input Args:
939 * vm - Virtual Machine
940 *
941 * Output Args: None
942 *
943 * Return: None
944 *
945 * Creates an interrupt controller chip for the VM specified by vm.
946 */
947void vm_create_irqchip(struct kvm_vm *vm)
948{
949 int ret;
950
951 ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
952 TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
953 "rc: %i errno: %i", ret, errno);
954}
955
956/* VM VCPU State
957 *
958 * Input Args:
959 * vm - Virtual Machine
960 * vcpuid - VCPU ID
961 *
962 * Output Args: None
963 *
964 * Return:
965 * Pointer to structure that describes the state of the VCPU.
966 *
967 * Locates and returns a pointer to a structure that describes the
968 * state of the VCPU with the given vcpuid.
969 */
970struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
971{
972 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
973 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
974
975 return vcpu->state;
976}
977
978/* VM VCPU Run
979 *
980 * Input Args:
981 * vm - Virtual Machine
982 * vcpuid - VCPU ID
983 *
984 * Output Args: None
985 *
986 * Return: None
987 *
988 * Switch to executing the code for the VCPU given by vcpuid, within the VM
989 * given by vm.
990 */
991void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
992{
993 int ret = _vcpu_run(vm, vcpuid);
994 TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
995 "rc: %i errno: %i", ret, errno);
996}
997
998int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
999{
1000 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1001 int rc;
1002
1003 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1004 do {
1005 rc = ioctl(vcpu->fd, KVM_RUN, NULL);
1006 } while (rc == -1 && errno == EINTR);
1007 return rc;
1008}
1009
1010/* VM VCPU Set MP State
1011 *
1012 * Input Args:
1013 * vm - Virtual Machine
1014 * vcpuid - VCPU ID
1015 * mp_state - mp_state to be set
1016 *
1017 * Output Args: None
1018 *
1019 * Return: None
1020 *
1021 * Sets the MP state of the VCPU given by vcpuid, to the state given
1022 * by mp_state.
1023 */
1024void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
1025 struct kvm_mp_state *mp_state)
1026{
1027 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1028 int ret;
1029
1030 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1031
1032 ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
1033 TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
1034 "rc: %i errno: %i", ret, errno);
1035}
1036
1037/* VM VCPU Regs Get
1038 *
1039 * Input Args:
1040 * vm - Virtual Machine
1041 * vcpuid - VCPU ID
1042 *
1043 * Output Args:
1044 * regs - current state of VCPU regs
1045 *
1046 * Return: None
1047 *
1048 * Obtains the current register state for the VCPU specified by vcpuid
1049 * and stores it at the location given by regs.
1050 */
1051void vcpu_regs_get(struct kvm_vm *vm,
1052 uint32_t vcpuid, struct kvm_regs *regs)
1053{
1054 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1055 int ret;
1056
1057 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1058
1059 /* Get the regs. */
1060 ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
1061 TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
1062 ret, errno);
1063}
1064
1065/* VM VCPU Regs Set
1066 *
1067 * Input Args:
1068 * vm - Virtual Machine
1069 * vcpuid - VCPU ID
1070 * regs - Values to set VCPU regs to
1071 *
1072 * Output Args: None
1073 *
1074 * Return: None
1075 *
1076 * Sets the regs of the VCPU specified by vcpuid to the values
1077 * given by regs.
1078 */
1079void vcpu_regs_set(struct kvm_vm *vm,
1080 uint32_t vcpuid, struct kvm_regs *regs)
1081{
1082 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1083 int ret;
1084
1085 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1086
1087 /* Set the regs. */
1088 ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
1089 TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
1090 ret, errno);
1091}
1092
1093void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
1094 struct kvm_vcpu_events *events)
1095{
1096 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1097 int ret;
1098
1099 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1100
1101 /* Get the regs. */
1102 ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
1103 TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
1104 ret, errno);
1105}
1106
1107void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
1108 struct kvm_vcpu_events *events)
1109{
1110 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1111 int ret;
1112
1113 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1114
1115 /* Set the regs. */
1116 ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
1117 TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
1118 ret, errno);
1119}
1120
1121/* VM VCPU Args Set
1122 *
1123 * Input Args:
1124 * vm - Virtual Machine
1125 * vcpuid - VCPU ID
1126 * num - number of arguments
1127 * ... - arguments, each of type uint64_t
1128 *
1129 * Output Args: None
1130 *
1131 * Return: None
1132 *
1133 * Sets the first num function input arguments to the values
1134 * given as variable args. Each of the variable args is expected to
1135 * be of type uint64_t.
1136 */
1137void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
1138{
1139 va_list ap;
1140 struct kvm_regs regs;
1141
1142 TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
1143 " num: %u\n",
1144 num);
1145
1146 va_start(ap, num);
1147 vcpu_regs_get(vm, vcpuid, &regs);
1148
1149 if (num >= 1)
1150 regs.rdi = va_arg(ap, uint64_t);
1151
1152 if (num >= 2)
1153 regs.rsi = va_arg(ap, uint64_t);
1154
1155 if (num >= 3)
1156 regs.rdx = va_arg(ap, uint64_t);
1157
1158 if (num >= 4)
1159 regs.rcx = va_arg(ap, uint64_t);
1160
1161 if (num >= 5)
1162 regs.r8 = va_arg(ap, uint64_t);
1163
1164 if (num >= 6)
1165 regs.r9 = va_arg(ap, uint64_t);
1166
1167 vcpu_regs_set(vm, vcpuid, &regs);
1168 va_end(ap);
1169}
1170
1171/* VM VCPU System Regs Get
1172 *
1173 * Input Args:
1174 * vm - Virtual Machine
1175 * vcpuid - VCPU ID
1176 *
1177 * Output Args:
1178 * sregs - current state of VCPU system regs
1179 *
1180 * Return: None
1181 *
1182 * Obtains the current system register state for the VCPU specified by
1183 * vcpuid and stores it at the location given by sregs.
1184 */
1185void vcpu_sregs_get(struct kvm_vm *vm,
1186 uint32_t vcpuid, struct kvm_sregs *sregs)
1187{
1188 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1189 int ret;
1190
1191 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1192
1193 /* Get the regs. */
1194 /* Get the regs. */
1195 ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
1196 TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
1197 ret, errno);
1198}
1199
1200/* VM VCPU System Regs Set
1201 *
1202 * Input Args:
1203 * vm - Virtual Machine
1204 * vcpuid - VCPU ID
1205 * sregs - Values to set VCPU system regs to
1206 *
1207 * Output Args: None
1208 *
1209 * Return: None
1210 *
1211 * Sets the system regs of the VCPU specified by vcpuid to the values
1212 * given by sregs.
1213 */
1214void vcpu_sregs_set(struct kvm_vm *vm,
1215 uint32_t vcpuid, struct kvm_sregs *sregs)
1216{
1217 int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
1218 TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1219 "rc: %i errno: %i", ret, errno);
1220}
1221
1222int _vcpu_sregs_set(struct kvm_vm *vm,
1223 uint32_t vcpuid, struct kvm_sregs *sregs)
1224{
1225 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1226 int ret;
1227
1228 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1229
1230 /* Get the regs. */
1231 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
1232}
1233
1234/* VCPU Ioctl
1235 *
1236 * Input Args:
1237 * vm - Virtual Machine
1238 * vcpuid - VCPU ID
1239 * cmd - Ioctl number
1240 * arg - Argument to pass to the ioctl
1241 *
1242 * Return: None
1243 *
1244 * Issues an arbitrary ioctl on a VCPU fd.
1245 */
1246void vcpu_ioctl(struct kvm_vm *vm,
1247 uint32_t vcpuid, unsigned long cmd, void *arg)
1248{
1249 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1250 int ret;
1251
1252 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1253
1254 ret = ioctl(vcpu->fd, cmd, arg);
1255 TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
1256 cmd, ret, errno, strerror(errno));
1257}
1258
1259/* VM Ioctl
1260 *
1261 * Input Args:
1262 * vm - Virtual Machine
1263 * cmd - Ioctl number
1264 * arg - Argument to pass to the ioctl
1265 *
1266 * Return: None
1267 *
1268 * Issues an arbitrary ioctl on a VM fd.
1269 */
1270void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1271{
1272 int ret;
1273
1274 ret = ioctl(vm->fd, cmd, arg);
1275 TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
1276 cmd, ret, errno, strerror(errno));
1277}
1278
1279/* VM Dump
1280 *
1281 * Input Args:
1282 * vm - Virtual Machine
1283 * indent - Left margin indent amount
1284 *
1285 * Output Args:
1286 * stream - Output FILE stream
1287 *
1288 * Return: None
1289 *
1290 * Dumps the current state of the VM given by vm, to the FILE stream
1291 * given by stream.
1292 */
1293void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
1294{
1295 struct userspace_mem_region *region;
1296 struct vcpu *vcpu;
1297
1298 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
1299 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
1300 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
1301 fprintf(stream, "%*sMem Regions:\n", indent, "");
1302 for (region = vm->userspace_mem_region_head; region;
1303 region = region->next) {
1304 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
1305 "host_virt: %p\n", indent + 2, "",
1306 (uint64_t) region->region.guest_phys_addr,
1307 (uint64_t) region->region.memory_size,
1308 region->host_mem);
1309 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
1310 sparsebit_dump(stream, region->unused_phy_pages, 0);
1311 }
1312 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
1313 sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
1314 fprintf(stream, "%*spgd_created: %u\n", indent, "",
1315 vm->pgd_created);
1316 if (vm->pgd_created) {
1317 fprintf(stream, "%*sVirtual Translation Tables:\n",
1318 indent + 2, "");
1319 virt_dump(stream, vm, indent + 4);
1320 }
1321 fprintf(stream, "%*sVCPUs:\n", indent, "");
1322 for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
1323 vcpu_dump(stream, vm, vcpu->id, indent + 2);
1324}
1325
1326/* VM VCPU Dump
1327 *
1328 * Input Args:
1329 * vm - Virtual Machine
1330 * vcpuid - VCPU ID
1331 * indent - Left margin indent amount
1332 *
1333 * Output Args:
1334 * stream - Output FILE stream
1335 *
1336 * Return: None
1337 *
1338 * Dumps the current state of the VCPU specified by vcpuid, within the VM
1339 * given by vm, to the FILE stream given by stream.
1340 */
1341void vcpu_dump(FILE *stream, struct kvm_vm *vm,
1342 uint32_t vcpuid, uint8_t indent)
1343{
1344 struct kvm_regs regs;
1345 struct kvm_sregs sregs;
1346
1347 fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
1348
1349 fprintf(stream, "%*sregs:\n", indent + 2, "");
1350 vcpu_regs_get(vm, vcpuid, &regs);
1351 regs_dump(stream, &regs, indent + 4);
1352
1353 fprintf(stream, "%*ssregs:\n", indent + 2, "");
1354 vcpu_sregs_get(vm, vcpuid, &sregs);
1355 sregs_dump(stream, &sregs, indent + 4);
1356}
1357
1358/* Known KVM exit reasons */
1359static struct exit_reason {
1360 unsigned int reason;
1361 const char *name;
1362} exit_reasons_known[] = {
1363 {KVM_EXIT_UNKNOWN, "UNKNOWN"},
1364 {KVM_EXIT_EXCEPTION, "EXCEPTION"},
1365 {KVM_EXIT_IO, "IO"},
1366 {KVM_EXIT_HYPERCALL, "HYPERCALL"},
1367 {KVM_EXIT_DEBUG, "DEBUG"},
1368 {KVM_EXIT_HLT, "HLT"},
1369 {KVM_EXIT_MMIO, "MMIO"},
1370 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
1371 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
1372 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
1373 {KVM_EXIT_INTR, "INTR"},
1374 {KVM_EXIT_SET_TPR, "SET_TPR"},
1375 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
1376 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
1377 {KVM_EXIT_S390_RESET, "S390_RESET"},
1378 {KVM_EXIT_DCR, "DCR"},
1379 {KVM_EXIT_NMI, "NMI"},
1380 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
1381 {KVM_EXIT_OSI, "OSI"},
1382 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
1383#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
1384 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
1385#endif
1386};
1387
1388/* Exit Reason String
1389 *
1390 * Input Args:
1391 * exit_reason - Exit reason
1392 *
1393 * Output Args: None
1394 *
1395 * Return:
1396 * Constant string pointer describing the exit reason.
1397 *
1398 * Locates and returns a constant string that describes the KVM exit
1399 * reason given by exit_reason. If no such string is found, a constant
1400 * string of "Unknown" is returned.
1401 */
1402const char *exit_reason_str(unsigned int exit_reason)
1403{
1404 unsigned int n1;
1405
1406 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
1407 if (exit_reason == exit_reasons_known[n1].reason)
1408 return exit_reasons_known[n1].name;
1409 }
1410
1411 return "Unknown";
1412}
1413
1414/* Physical Page Allocate
1415 *
1416 * Input Args:
1417 * vm - Virtual Machine
1418 * paddr_min - Physical address minimum
1419 * memslot - Memory region to allocate page from
1420 *
1421 * Output Args: None
1422 *
1423 * Return:
1424 * Starting physical address
1425 *
1426 * Within the VM specified by vm, locates an available physical page
1427 * at or above paddr_min. If found, the page is marked as in use
1428 * and its address is returned. A TEST_ASSERT failure occurs if no
1429 * page is available at or above paddr_min.
1430 */
1431vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
1432 vm_paddr_t paddr_min, uint32_t memslot)
1433{
1434 struct userspace_mem_region *region;
1435 sparsebit_idx_t pg;
1436
1437 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
1438 "not divisable by page size.\n"
1439 " paddr_min: 0x%lx page_size: 0x%x",
1440 paddr_min, vm->page_size);
1441
1442 /* Locate memory region. */
1443 region = memslot2region(vm, memslot);
1444
1445 /* Locate next available physical page at or above paddr_min. */
1446 pg = paddr_min >> vm->page_shift;
1447
1448 if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
1449 pg = sparsebit_next_set(region->unused_phy_pages, pg);
1450 if (pg == 0) {
1451 fprintf(stderr, "No guest physical page available, "
1452 "paddr_min: 0x%lx page_size: 0x%x memslot: %u",
1453 paddr_min, vm->page_size, memslot);
1454 fputs("---- vm dump ----\n", stderr);
1455 vm_dump(stderr, vm, 2);
1456 abort();
1457 }
1458 }
1459
1460 /* Specify page as in use and return its address. */
1461 sparsebit_clear(region->unused_phy_pages, pg);
1462
1463 return pg * vm->page_size;
1464}
1465
1466/* Address Guest Virtual to Host Virtual
1467 *
1468 * Input Args:
1469 * vm - Virtual Machine
1470 * gva - VM virtual address
1471 *
1472 * Output Args: None
1473 *
1474 * Return:
1475 * Equivalent host virtual address
1476 */
1477void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
1478{
1479 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
1480}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000000..a0bd1980c81c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,67 @@
1/*
2 * tools/testing/selftests/kvm/lib/kvm_util.c
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 */
8
9#ifndef KVM_UTIL_INTERNAL_H
10#define KVM_UTIL_INTERNAL_H 1
11
12#include "sparsebit.h"
13
14#ifndef BITS_PER_BYTE
15#define BITS_PER_BYTE 8
16#endif
17
18#ifndef BITS_PER_LONG
19#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
20#endif
21
22#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
23#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
24
25/* Concrete definition of struct kvm_vm. */
26struct userspace_mem_region {
27 struct userspace_mem_region *next, *prev;
28 struct kvm_userspace_memory_region region;
29 struct sparsebit *unused_phy_pages;
30 int fd;
31 off_t offset;
32 void *host_mem;
33 void *mmap_start;
34 size_t mmap_size;
35};
36
37struct vcpu {
38 struct vcpu *next, *prev;
39 uint32_t id;
40 int fd;
41 struct kvm_run *state;
42};
43
44struct kvm_vm {
45 int mode;
46 int fd;
47 unsigned int page_size;
48 unsigned int page_shift;
49 uint64_t max_gfn;
50 struct vcpu *vcpu_head;
51 struct userspace_mem_region *userspace_mem_region_head;
52 struct sparsebit *vpages_valid;
53 struct sparsebit *vpages_mapped;
54 bool pgd_created;
55 vm_paddr_t pgd;
56};
57
58struct vcpu *vcpu_find(struct kvm_vm *vm,
59 uint32_t vcpuid);
60void vcpu_setup(struct kvm_vm *vm, int vcpuid);
61void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
62void regs_dump(FILE *stream, struct kvm_regs *regs,
63 uint8_t indent);
64void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
65 uint8_t indent);
66
67#endif
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000000..0c5cf3e0cb6f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2087 @@
1/*
2 * Sparse bit array
3 *
4 * Copyright (C) 2018, Google LLC.
5 * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 *
9 * This library provides functions to support a memory efficient bit array,
10 * with an index size of 2^64. A sparsebit array is allocated through
11 * the use sparsebit_alloc() and free'd via sparsebit_free(),
12 * such as in the following:
13 *
14 * struct sparsebit *s;
15 * s = sparsebit_alloc();
16 * sparsebit_free(&s);
17 *
18 * The struct sparsebit type resolves down to a struct sparsebit.
19 * Note that, sparsebit_free() takes a pointer to the sparsebit
20 * structure. This is so that sparsebit_free() is able to poison
21 * the pointer (e.g. set it to NULL) to the struct sparsebit before
22 * returning to the caller.
23 *
24 * Between the return of sparsebit_alloc() and the call of
25 * sparsebit_free(), there are multiple query and modifying operations
26 * that can be performed on the allocated sparsebit array. All of
27 * these operations take as a parameter the value returned from
28 * sparsebit_alloc() and most also take a bit index. Frequently
29 * used routines include:
30 *
31 * ---- Query Operations
32 * sparsebit_is_set(s, idx)
33 * sparsebit_is_clear(s, idx)
34 * sparsebit_any_set(s)
35 * sparsebit_first_set(s)
36 * sparsebit_next_set(s, prev_idx)
37 *
38 * ---- Modifying Operations
39 * sparsebit_set(s, idx)
40 * sparsebit_clear(s, idx)
41 * sparsebit_set_num(s, idx, num);
42 * sparsebit_clear_num(s, idx, num);
43 *
44 * A common operation, is to itterate over all the bits set in a test
45 * sparsebit array. This can be done via code with the following structure:
46 *
47 * sparsebit_idx_t idx;
48 * if (sparsebit_any_set(s)) {
49 * idx = sparsebit_first_set(s);
50 * do {
51 * ...
52 * idx = sparsebit_next_set(s, idx);
53 * } while (idx != 0);
54 * }
55 *
56 * The index of the first bit set needs to be obtained via
57 * sparsebit_first_set(), because sparsebit_next_set(), needs
58 * the index of the previously set. The sparsebit_idx_t type is
59 * unsigned, so there is no previous index before 0 that is available.
60 * Also, the call to sparsebit_first_set() is not made unless there
61 * is at least 1 bit in the array set. This is because sparsebit_first_set()
62 * aborts if sparsebit_first_set() is called with no bits set.
63 * It is the callers responsibility to assure that the
64 * sparsebit array has at least a single bit set before calling
65 * sparsebit_first_set().
66 *
67 * ==== Implementation Overview ====
68 * For the most part the internal implementation of sparsebit is
69 * opaque to the caller. One important implementation detail that the
70 * caller may need to be aware of is the spatial complexity of the
71 * implementation. This implementation of a sparsebit array is not
72 * only sparse, in that it uses memory proportional to the number of bits
73 * set. It is also efficient in memory usage when most of the bits are
74 * set.
75 *
76 * At a high-level the state of the bit settings are maintained through
77 * the use of a binary-search tree, where each node contains at least
78 * the following members:
79 *
80 * typedef uint64_t sparsebit_idx_t;
81 * typedef uint64_t sparsebit_num_t;
82 *
83 * sparsebit_idx_t idx;
84 * uint32_t mask;
85 * sparsebit_num_t num_after;
86 *
87 * The idx member contains the bit index of the first bit described by this
88 * node, while the mask member stores the setting of the first 32-bits.
89 * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
90 * mask member at 1 << n.
91 *
92 * Nodes are sorted by idx and the bits described by two nodes will never
93 * overlap. The idx member is always aligned to the mask size, i.e. a
94 * multiple of 32.
95 *
96 * Beyond a typical implementation, the nodes in this implementation also
97 * contains a member named num_after. The num_after member holds the
98 * number of bits immediately after the mask bits that are contiguously set.
99 * The use of the num_after member allows this implementation to efficiently
100 * represent cases where most bits are set. For example, the case of all
101 * but the last two bits set, is represented by the following two nodes:
102 *
103 * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
104 * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
105 *
106 * ==== Invariants ====
107 * This implementation usses the following invariants:
108 *
109 * + Node are only used to represent bits that are set.
110 * Nodes with a mask of 0 and num_after of 0 are not allowed.
111 *
112 * + Sum of bits set in all the nodes is equal to the value of
113 * the struct sparsebit_pvt num_set member.
114 *
115 * + The setting of at least one bit is always described in a nodes
116 * mask (mask >= 1).
117 *
118 * + A node with all mask bits set only occurs when the last bit
119 * described by the previous node is not equal to this nodes
120 * starting index - 1. All such occurences of this condition are
121 * avoided by moving the setting of the nodes mask bits into
122 * the previous nodes num_after setting.
123 *
124 * + Node starting index is evenly divisable by the number of bits
125 * within a nodes mask member.
126 *
127 * + Nodes never represent a range of bits that wrap around the
128 * highest supported index.
129 *
130 * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
131 *
132 * As a consequence of the above, the num_after member of a node
133 * will always be <=:
134 *
135 * maximum_index - nodes_starting_index - number_of_mask_bits
136 *
137 * + Nodes within the binary search tree are sorted based on each
138 * nodes starting index.
139 *
140 * + The range of bits described by any two nodes do not overlap. The
141 * range of bits described by a single node is:
142 *
143 * start: node->idx
144 * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
145 *
146 * Note, at times these invariants are temporarily violated for a
147 * specific portion of the code. For example, when setting a mask
148 * bit, there is a small delay between when the mask bit is set and the
149 * value in the struct sparsebit_pvt num_set member is updated. Other
150 * temporary violations occur when node_split() is called with a specified
151 * index and assures that a node where its mask represents the bit
152 * at the specified index exists. At times to do this node_split()
153 * must split an existing node into two nodes or create a node that
154 * has no bits set. Such temporary violations must be corrected before
155 * returning to the caller. These corrections are typically performed
156 * by the local function node_reduce().
157 */
158
159#include "test_util.h"
160#include "sparsebit.h"
161#include <limits.h>
162#include <assert.h>
163
164#define DUMP_LINE_MAX 100 /* Does not include indent amount */
165
166typedef uint32_t mask_t;
167#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
168
169struct node {
170 struct node *parent;
171 struct node *left;
172 struct node *right;
173 sparsebit_idx_t idx; /* index of least-significant bit in mask */
174 sparsebit_num_t num_after; /* num contiguously set after mask */
175 mask_t mask;
176};
177
178struct sparsebit {
179 /*
180 * Points to root node of the binary search
181 * tree. Equal to NULL when no bits are set in
182 * the entire sparsebit array.
183 */
184 struct node *root;
185
186 /*
187 * A redundant count of the total number of bits set. Used for
188 * diagnostic purposes and to change the time complexity of
189 * sparsebit_num_set() from O(n) to O(1).
190 * Note: Due to overflow, a value of 0 means none or all set.
191 */
192 sparsebit_num_t num_set;
193};
194
195/* Returns the number of set bits described by the settings
196 * of the node pointed to by nodep.
197 */
198static sparsebit_num_t node_num_set(struct node *nodep)
199{
200 return nodep->num_after + __builtin_popcount(nodep->mask);
201}
202
203/* Returns a pointer to the node that describes the
204 * lowest bit index.
205 */
206static struct node *node_first(struct sparsebit *s)
207{
208 struct node *nodep;
209
210 for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
211 ;
212
213 return nodep;
214}
215
216/* Returns a pointer to the node that describes the
217 * lowest bit index > the index of the node pointed to by np.
218 * Returns NULL if no node with a higher index exists.
219 */
220static struct node *node_next(struct sparsebit *s, struct node *np)
221{
222 struct node *nodep = np;
223
224 /*
225 * If current node has a right child, next node is the left-most
226 * of the right child.
227 */
228 if (nodep->right) {
229 for (nodep = nodep->right; nodep->left; nodep = nodep->left)
230 ;
231 return nodep;
232 }
233
234 /*
235 * No right child. Go up until node is left child of a parent.
236 * That parent is then the next node.
237 */
238 while (nodep->parent && nodep == nodep->parent->right)
239 nodep = nodep->parent;
240
241 return nodep->parent;
242}
243
244/* Searches for and returns a pointer to the node that describes the
245 * highest index < the index of the node pointed to by np.
246 * Returns NULL if no node with a lower index exists.
247 */
248static struct node *node_prev(struct sparsebit *s, struct node *np)
249{
250 struct node *nodep = np;
251
252 /*
253 * If current node has a left child, next node is the right-most
254 * of the left child.
255 */
256 if (nodep->left) {
257 for (nodep = nodep->left; nodep->right; nodep = nodep->right)
258 ;
259 return (struct node *) nodep;
260 }
261
262 /*
263 * No left child. Go up until node is right child of a parent.
264 * That parent is then the next node.
265 */
266 while (nodep->parent && nodep == nodep->parent->left)
267 nodep = nodep->parent;
268
269 return (struct node *) nodep->parent;
270}
271
272
273/* Allocates space to hold a copy of the node sub-tree pointed to by
274 * subtree and duplicates the bit settings to the newly allocated nodes.
275 * Returns the newly allocated copy of subtree.
276 */
277static struct node *node_copy_subtree(struct node *subtree)
278{
279 struct node *root;
280
281 /* Duplicate the node at the root of the subtree */
282 root = calloc(1, sizeof(*root));
283 if (!root) {
284 perror("calloc");
285 abort();
286 }
287
288 root->idx = subtree->idx;
289 root->mask = subtree->mask;
290 root->num_after = subtree->num_after;
291
292 /* As needed, recursively duplicate the left and right subtrees */
293 if (subtree->left) {
294 root->left = node_copy_subtree(subtree->left);
295 root->left->parent = root;
296 }
297
298 if (subtree->right) {
299 root->right = node_copy_subtree(subtree->right);
300 root->right->parent = root;
301 }
302
303 return root;
304}
305
306/* Searches for and returns a pointer to the node that describes the setting
307 * of the bit given by idx. A node describes the setting of a bit if its
308 * index is within the bits described by the mask bits or the number of
309 * contiguous bits set after the mask. Returns NULL if there is no such node.
310 */
311static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
312{
313 struct node *nodep;
314
315 /* Find the node that describes the setting of the bit at idx */
316 for (nodep = s->root; nodep;
317 nodep = nodep->idx > idx ? nodep->left : nodep->right) {
318 if (idx >= nodep->idx &&
319 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
320 break;
321 }
322
323 return nodep;
324}
325
326/* Entry Requirements:
327 * + A node that describes the setting of idx is not already present.
328 *
329 * Adds a new node to describe the setting of the bit at the index given
330 * by idx. Returns a pointer to the newly added node.
331 *
332 * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
333 */
334static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
335{
336 struct node *nodep, *parentp, *prev;
337
338 /* Allocate and initialize the new node. */
339 nodep = calloc(1, sizeof(*nodep));
340 if (!nodep) {
341 perror("calloc");
342 abort();
343 }
344
345 nodep->idx = idx & -MASK_BITS;
346
347 /* If no nodes, set it up as the root node. */
348 if (!s->root) {
349 s->root = nodep;
350 return nodep;
351 }
352
353 /*
354 * Find the parent where the new node should be attached
355 * and add the node there.
356 */
357 parentp = s->root;
358 while (true) {
359 if (idx < parentp->idx) {
360 if (!parentp->left) {
361 parentp->left = nodep;
362 nodep->parent = parentp;
363 break;
364 }
365 parentp = parentp->left;
366 } else {
367 assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
368 if (!parentp->right) {
369 parentp->right = nodep;
370 nodep->parent = parentp;
371 break;
372 }
373 parentp = parentp->right;
374 }
375 }
376
377 /*
378 * Does num_after bits of previous node overlap with the mask
379 * of the new node? If so set the bits in the new nodes mask
380 * and reduce the previous nodes num_after.
381 */
382 prev = node_prev(s, nodep);
383 while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
384 unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
385 - nodep->idx;
386 assert(prev->num_after > 0);
387 assert(n1 < MASK_BITS);
388 assert(!(nodep->mask & (1 << n1)));
389 nodep->mask |= (1 << n1);
390 prev->num_after--;
391 }
392
393 return nodep;
394}
395
396/* Returns whether all the bits in the sparsebit array are set. */
397bool sparsebit_all_set(struct sparsebit *s)
398{
399 /*
400 * If any nodes there must be at least one bit set. Only case
401 * where a bit is set and total num set is 0, is when all bits
402 * are set.
403 */
404 return s->root && s->num_set == 0;
405}
406
407/* Clears all bits described by the node pointed to by nodep, then
408 * removes the node.
409 */
410static void node_rm(struct sparsebit *s, struct node *nodep)
411{
412 struct node *tmp;
413 sparsebit_num_t num_set;
414
415 num_set = node_num_set(nodep);
416 assert(s->num_set >= num_set || sparsebit_all_set(s));
417 s->num_set -= node_num_set(nodep);
418
419 /* Have both left and right child */
420 if (nodep->left && nodep->right) {
421 /*
422 * Move left children to the leftmost leaf node
423 * of the right child.
424 */
425 for (tmp = nodep->right; tmp->left; tmp = tmp->left)
426 ;
427 tmp->left = nodep->left;
428 nodep->left = NULL;
429 tmp->left->parent = tmp;
430 }
431
432 /* Left only child */
433 if (nodep->left) {
434 if (!nodep->parent) {
435 s->root = nodep->left;
436 nodep->left->parent = NULL;
437 } else {
438 nodep->left->parent = nodep->parent;
439 if (nodep == nodep->parent->left)
440 nodep->parent->left = nodep->left;
441 else {
442 assert(nodep == nodep->parent->right);
443 nodep->parent->right = nodep->left;
444 }
445 }
446
447 nodep->parent = nodep->left = nodep->right = NULL;
448 free(nodep);
449
450 return;
451 }
452
453
454 /* Right only child */
455 if (nodep->right) {
456 if (!nodep->parent) {
457 s->root = nodep->right;
458 nodep->right->parent = NULL;
459 } else {
460 nodep->right->parent = nodep->parent;
461 if (nodep == nodep->parent->left)
462 nodep->parent->left = nodep->right;
463 else {
464 assert(nodep == nodep->parent->right);
465 nodep->parent->right = nodep->right;
466 }
467 }
468
469 nodep->parent = nodep->left = nodep->right = NULL;
470 free(nodep);
471
472 return;
473 }
474
475 /* Leaf Node */
476 if (!nodep->parent) {
477 s->root = NULL;
478 } else {
479 if (nodep->parent->left == nodep)
480 nodep->parent->left = NULL;
481 else {
482 assert(nodep == nodep->parent->right);
483 nodep->parent->right = NULL;
484 }
485 }
486
487 nodep->parent = nodep->left = nodep->right = NULL;
488 free(nodep);
489
490 return;
491}
492
493/* Splits the node containing the bit at idx so that there is a node
494 * that starts at the specified index. If no such node exists, a new
495 * node at the specified index is created. Returns the new node.
496 *
497 * idx must start of a mask boundary.
498 */
499static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
500{
501 struct node *nodep1, *nodep2;
502 sparsebit_idx_t offset;
503 sparsebit_num_t orig_num_after;
504
505 assert(!(idx % MASK_BITS));
506
507 /*
508 * Is there a node that describes the setting of idx?
509 * If not, add it.
510 */
511 nodep1 = node_find(s, idx);
512 if (!nodep1)
513 return node_add(s, idx);
514
515 /*
516 * All done if the starting index of the node is where the
517 * split should occur.
518 */
519 if (nodep1->idx == idx)
520 return nodep1;
521
522 /*
523 * Split point not at start of mask, so it must be part of
524 * bits described by num_after.
525 */
526
527 /*
528 * Calculate offset within num_after for where the split is
529 * to occur.
530 */
531 offset = idx - (nodep1->idx + MASK_BITS);
532 orig_num_after = nodep1->num_after;
533
534 /*
535 * Add a new node to describe the bits starting at
536 * the split point.
537 */
538 nodep1->num_after = offset;
539 nodep2 = node_add(s, idx);
540
541 /* Move bits after the split point into the new node */
542 nodep2->num_after = orig_num_after - offset;
543 if (nodep2->num_after >= MASK_BITS) {
544 nodep2->mask = ~(mask_t) 0;
545 nodep2->num_after -= MASK_BITS;
546 } else {
547 nodep2->mask = (1 << nodep2->num_after) - 1;
548 nodep2->num_after = 0;
549 }
550
551 return nodep2;
552}
553
554/* Iteratively reduces the node pointed to by nodep and its adjacent
555 * nodes into a more compact form. For example, a node with a mask with
556 * all bits set adjacent to a previous node, will get combined into a
557 * single node with an increased num_after setting.
558 *
559 * After each reduction, a further check is made to see if additional
560 * reductions are possible with the new previous and next nodes. Note,
561 * a search for a reduction is only done across the nodes nearest nodep
562 * and those that became part of a reduction. Reductions beyond nodep
563 * and the adjacent nodes that are reduced are not discovered. It is the
564 * responsibility of the caller to pass a nodep that is within one node
565 * of each possible reduction.
566 *
567 * This function does not fix the temporary violation of all invariants.
568 * For example it does not fix the case where the bit settings described
569 * by two or more nodes overlap. Such a violation introduces the potential
570 * complication of a bit setting for a specific index having different settings
571 * in different nodes. This would then introduce the further complication
572 * of which node has the correct setting of the bit and thus such conditions
573 * are not allowed.
574 *
575 * This function is designed to fix invariant violations that are introduced
576 * by node_split() and by changes to the nodes mask or num_after members.
577 * For example, when setting a bit within a nodes mask, the function that
578 * sets the bit doesn't have to worry about whether the setting of that
579 * bit caused the mask to have leading only or trailing only bits set.
580 * Instead, the function can call node_reduce(), with nodep equal to the
581 * node address that it set a mask bit in, and node_reduce() will notice
582 * the cases of leading or trailing only bits and that there is an
583 * adjacent node that the bit settings could be merged into.
584 *
585 * This implementation specifically detects and corrects violation of the
586 * following invariants:
587 *
588 * + Node are only used to represent bits that are set.
589 * Nodes with a mask of 0 and num_after of 0 are not allowed.
590 *
591 * + The setting of at least one bit is always described in a nodes
592 * mask (mask >= 1).
593 *
594 * + A node with all mask bits set only occurs when the last bit
595 * described by the previous node is not equal to this nodes
596 * starting index - 1. All such occurences of this condition are
597 * avoided by moving the setting of the nodes mask bits into
598 * the previous nodes num_after setting.
599 */
600static void node_reduce(struct sparsebit *s, struct node *nodep)
601{
602 bool reduction_performed;
603
604 do {
605 reduction_performed = false;
606 struct node *prev, *next, *tmp;
607
608 /* 1) Potential reductions within the current node. */
609
610 /* Nodes with all bits cleared may be removed. */
611 if (nodep->mask == 0 && nodep->num_after == 0) {
612 /*
613 * About to remove the node pointed to by
614 * nodep, which normally would cause a problem
615 * for the next pass through the reduction loop,
616 * because the node at the starting point no longer
617 * exists. This potential problem is handled
618 * by first remembering the location of the next
619 * or previous nodes. Doesn't matter which, because
620 * once the node at nodep is removed, there will be
621 * no other nodes between prev and next.
622 *
623 * Note, the checks performed on nodep against both
624 * both prev and next both check for an adjacent
625 * node that can be reduced into a single node. As
626 * such, after removing the node at nodep, doesn't
627 * matter whether the nodep for the next pass
628 * through the loop is equal to the previous pass
629 * prev or next node. Either way, on the next pass
630 * the one not selected will become either the
631 * prev or next node.
632 */
633 tmp = node_next(s, nodep);
634 if (!tmp)
635 tmp = node_prev(s, nodep);
636
637 node_rm(s, nodep);
638 nodep = NULL;
639
640 nodep = tmp;
641 reduction_performed = true;
642 continue;
643 }
644
645 /*
646 * When the mask is 0, can reduce the amount of num_after
647 * bits by moving the initial num_after bits into the mask.
648 */
649 if (nodep->mask == 0) {
650 assert(nodep->num_after != 0);
651 assert(nodep->idx + MASK_BITS > nodep->idx);
652
653 nodep->idx += MASK_BITS;
654
655 if (nodep->num_after >= MASK_BITS) {
656 nodep->mask = ~0;
657 nodep->num_after -= MASK_BITS;
658 } else {
659 nodep->mask = (1u << nodep->num_after) - 1;
660 nodep->num_after = 0;
661 }
662
663 reduction_performed = true;
664 continue;
665 }
666
667 /*
668 * 2) Potential reductions between the current and
669 * previous nodes.
670 */
671 prev = node_prev(s, nodep);
672 if (prev) {
673 sparsebit_idx_t prev_highest_bit;
674
675 /* Nodes with no bits set can be removed. */
676 if (prev->mask == 0 && prev->num_after == 0) {
677 node_rm(s, prev);
678
679 reduction_performed = true;
680 continue;
681 }
682
683 /*
684 * All mask bits set and previous node has
685 * adjacent index.
686 */
687 if (nodep->mask + 1 == 0 &&
688 prev->idx + MASK_BITS == nodep->idx) {
689 prev->num_after += MASK_BITS + nodep->num_after;
690 nodep->mask = 0;
691 nodep->num_after = 0;
692
693 reduction_performed = true;
694 continue;
695 }
696
697 /*
698 * Is node adjacent to previous node and the node
699 * contains a single contiguous range of bits
700 * starting from the beginning of the mask?
701 */
702 prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
703 if (prev_highest_bit + 1 == nodep->idx &&
704 (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
705 /*
706 * How many contiguous bits are there?
707 * Is equal to the total number of set
708 * bits, due to an earlier check that
709 * there is a single contiguous range of
710 * set bits.
711 */
712 unsigned int num_contiguous
713 = __builtin_popcount(nodep->mask);
714 assert((num_contiguous > 0) &&
715 ((1ULL << num_contiguous) - 1) == nodep->mask);
716
717 prev->num_after += num_contiguous;
718 nodep->mask = 0;
719
720 /*
721 * For predictable performance, handle special
722 * case where all mask bits are set and there
723 * is a non-zero num_after setting. This code
724 * is functionally correct without the following
725 * conditionalized statements, but without them
726 * the value of num_after is only reduced by
727 * the number of mask bits per pass. There are
728 * cases where num_after can be close to 2^64.
729 * Without this code it could take nearly
730 * (2^64) / 32 passes to perform the full
731 * reduction.
732 */
733 if (num_contiguous == MASK_BITS) {
734 prev->num_after += nodep->num_after;
735 nodep->num_after = 0;
736 }
737
738 reduction_performed = true;
739 continue;
740 }
741 }
742
743 /*
744 * 3) Potential reductions between the current and
745 * next nodes.
746 */
747 next = node_next(s, nodep);
748 if (next) {
749 /* Nodes with no bits set can be removed. */
750 if (next->mask == 0 && next->num_after == 0) {
751 node_rm(s, next);
752 reduction_performed = true;
753 continue;
754 }
755
756 /*
757 * Is next node index adjacent to current node
758 * and has a mask with all bits set?
759 */
760 if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
761 next->mask == ~(mask_t) 0) {
762 nodep->num_after += MASK_BITS;
763 next->mask = 0;
764 nodep->num_after += next->num_after;
765 next->num_after = 0;
766
767 node_rm(s, next);
768 next = NULL;
769
770 reduction_performed = true;
771 continue;
772 }
773 }
774 } while (nodep && reduction_performed);
775}
776
777/* Returns whether the bit at the index given by idx, within the
778 * sparsebit array is set or not.
779 */
780bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
781{
782 struct node *nodep;
783
784 /* Find the node that describes the setting of the bit at idx */
785 for (nodep = s->root; nodep;
786 nodep = nodep->idx > idx ? nodep->left : nodep->right)
787 if (idx >= nodep->idx &&
788 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
789 goto have_node;
790
791 return false;
792
793have_node:
794 /* Bit is set if it is any of the bits described by num_after */
795 if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
796 return true;
797
798 /* Is the corresponding mask bit set */
799 assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
800 return !!(nodep->mask & (1 << (idx - nodep->idx)));
801}
802
803/* Within the sparsebit array pointed to by s, sets the bit
804 * at the index given by idx.
805 */
806static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
807{
808 struct node *nodep;
809
810 /* Skip bits that are already set */
811 if (sparsebit_is_set(s, idx))
812 return;
813
814 /*
815 * Get a node where the bit at idx is described by the mask.
816 * The node_split will also create a node, if there isn't
817 * already a node that describes the setting of bit.
818 */
819 nodep = node_split(s, idx & -MASK_BITS);
820
821 /* Set the bit within the nodes mask */
822 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
823 assert(!(nodep->mask & (1 << (idx - nodep->idx))));
824 nodep->mask |= 1 << (idx - nodep->idx);
825 s->num_set++;
826
827 node_reduce(s, nodep);
828}
829
830/* Within the sparsebit array pointed to by s, clears the bit
831 * at the index given by idx.
832 */
833static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
834{
835 struct node *nodep;
836
837 /* Skip bits that are already cleared */
838 if (!sparsebit_is_set(s, idx))
839 return;
840
841 /* Is there a node that describes the setting of this bit? */
842 nodep = node_find(s, idx);
843 if (!nodep)
844 return;
845
846 /*
847 * If a num_after bit, split the node, so that the bit is
848 * part of a node mask.
849 */
850 if (idx >= nodep->idx + MASK_BITS)
851 nodep = node_split(s, idx & -MASK_BITS);
852
853 /*
854 * After node_split above, bit at idx should be within the mask.
855 * Clear that bit.
856 */
857 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
858 assert(nodep->mask & (1 << (idx - nodep->idx)));
859 nodep->mask &= ~(1 << (idx - nodep->idx));
860 assert(s->num_set > 0 || sparsebit_all_set(s));
861 s->num_set--;
862
863 node_reduce(s, nodep);
864}
865
866/* Recursively dumps to the FILE stream given by stream the contents
867 * of the sub-tree of nodes pointed to by nodep. Each line of output
868 * is prefixed by the number of spaces given by indent. On each
869 * recursion, the indent amount is increased by 2. This causes nodes
870 * at each level deeper into the binary search tree to be displayed
871 * with a greater indent.
872 */
873static void dump_nodes(FILE *stream, struct node *nodep,
874 unsigned int indent)
875{
876 char *node_type;
877
878 /* Dump contents of node */
879 if (!nodep->parent)
880 node_type = "root";
881 else if (nodep == nodep->parent->left)
882 node_type = "left";
883 else {
884 assert(nodep == nodep->parent->right);
885 node_type = "right";
886 }
887 fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
888 fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
889 nodep->parent, nodep->left, nodep->right);
890 fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
891 indent, "", nodep->idx, nodep->mask, nodep->num_after);
892
893 /* If present, dump contents of left child nodes */
894 if (nodep->left)
895 dump_nodes(stream, nodep->left, indent + 2);
896
897 /* If present, dump contents of right child nodes */
898 if (nodep->right)
899 dump_nodes(stream, nodep->right, indent + 2);
900}
901
902static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
903{
904 mask_t leading = (mask_t)1 << start;
905 int n1 = __builtin_ctz(nodep->mask & -leading);
906
907 return nodep->idx + n1;
908}
909
910static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
911{
912 mask_t leading = (mask_t)1 << start;
913 int n1 = __builtin_ctz(~nodep->mask & -leading);
914
915 return nodep->idx + n1;
916}
917
918/* Dumps to the FILE stream specified by stream, the implementation dependent
919 * internal state of s. Each line of output is prefixed with the number
920 * of spaces given by indent. The output is completely implementation
921 * dependent and subject to change. Output from this function should only
922 * be used for diagnostic purposes. For example, this function can be
923 * used by test cases after they detect an unexpected condition, as a means
924 * to capture diagnostic information.
925 */
926static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
927 unsigned int indent)
928{
929 /* Dump the contents of s */
930 fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
931 fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
932
933 if (s->root)
934 dump_nodes(stream, s->root, indent);
935}
936
937/* Allocates and returns a new sparsebit array. The initial state
938 * of the newly allocated sparsebit array has all bits cleared.
939 */
940struct sparsebit *sparsebit_alloc(void)
941{
942 struct sparsebit *s;
943
944 /* Allocate top level structure. */
945 s = calloc(1, sizeof(*s));
946 if (!s) {
947 perror("calloc");
948 abort();
949 }
950
951 return s;
952}
953
954/* Frees the implementation dependent data for the sparsebit array
955 * pointed to by s and poisons the pointer to that data.
956 */
957void sparsebit_free(struct sparsebit **sbitp)
958{
959 struct sparsebit *s = *sbitp;
960
961 if (!s)
962 return;
963
964 sparsebit_clear_all(s);
965 free(s);
966 *sbitp = NULL;
967}
968
969/* Makes a copy of the sparsebit array given by s, to the sparsebit
970 * array given by d. Note, d must have already been allocated via
971 * sparsebit_alloc(). It can though already have bits set, which
972 * if different from src will be cleared.
973 */
974void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
975{
976 /* First clear any bits already set in the destination */
977 sparsebit_clear_all(d);
978
979 if (s->root) {
980 d->root = node_copy_subtree(s->root);
981 d->num_set = s->num_set;
982 }
983}
984
985/* Returns whether num consecutive bits starting at idx are all set. */
986bool sparsebit_is_set_num(struct sparsebit *s,
987 sparsebit_idx_t idx, sparsebit_num_t num)
988{
989 sparsebit_idx_t next_cleared;
990
991 assert(num > 0);
992 assert(idx + num - 1 >= idx);
993
994 /* With num > 0, the first bit must be set. */
995 if (!sparsebit_is_set(s, idx))
996 return false;
997
998 /* Find the next cleared bit */
999 next_cleared = sparsebit_next_clear(s, idx);
1000
1001 /*
1002 * If no cleared bits beyond idx, then there are at least num
1003 * set bits. idx + num doesn't wrap. Otherwise check if
1004 * there are enough set bits between idx and the next cleared bit.
1005 */
1006 return next_cleared == 0 || next_cleared - idx >= num;
1007}
1008
1009/* Returns whether the bit at the index given by idx. */
1010bool sparsebit_is_clear(struct sparsebit *s,
1011 sparsebit_idx_t idx)
1012{
1013 return !sparsebit_is_set(s, idx);
1014}
1015
1016/* Returns whether num consecutive bits starting at idx are all cleared. */
1017bool sparsebit_is_clear_num(struct sparsebit *s,
1018 sparsebit_idx_t idx, sparsebit_num_t num)
1019{
1020 sparsebit_idx_t next_set;
1021
1022 assert(num > 0);
1023 assert(idx + num - 1 >= idx);
1024
1025 /* With num > 0, the first bit must be cleared. */
1026 if (!sparsebit_is_clear(s, idx))
1027 return false;
1028
1029 /* Find the next set bit */
1030 next_set = sparsebit_next_set(s, idx);
1031
1032 /*
1033 * If no set bits beyond idx, then there are at least num
1034 * cleared bits. idx + num doesn't wrap. Otherwise check if
1035 * there are enough cleared bits between idx and the next set bit.
1036 */
1037 return next_set == 0 || next_set - idx >= num;
1038}
1039
1040/* Returns the total number of bits set. Note: 0 is also returned for
1041 * the case of all bits set. This is because with all bits set, there
1042 * is 1 additional bit set beyond what can be represented in the return
1043 * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
1044 * to determine if the sparsebit array has any bits set.
1045 */
1046sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
1047{
1048 return s->num_set;
1049}
1050
1051/* Returns whether any bit is set in the sparsebit array. */
1052bool sparsebit_any_set(struct sparsebit *s)
1053{
1054 /*
1055 * Nodes only describe set bits. If any nodes then there
1056 * is at least 1 bit set.
1057 */
1058 if (!s->root)
1059 return false;
1060
1061 /*
1062 * Every node should have a non-zero mask. For now will
1063 * just assure that the root node has a non-zero mask,
1064 * which is a quick check that at least 1 bit is set.
1065 */
1066 assert(s->root->mask != 0);
1067 assert(s->num_set > 0 ||
1068 (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
1069 s->root->mask == ~(mask_t) 0));
1070
1071 return true;
1072}
1073
1074/* Returns whether all the bits in the sparsebit array are cleared. */
1075bool sparsebit_all_clear(struct sparsebit *s)
1076{
1077 return !sparsebit_any_set(s);
1078}
1079
1080/* Returns whether all the bits in the sparsebit array are set. */
1081bool sparsebit_any_clear(struct sparsebit *s)
1082{
1083 return !sparsebit_all_set(s);
1084}
1085
1086/* Returns the index of the first set bit. Abort if no bits are set.
1087 */
1088sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
1089{
1090 struct node *nodep;
1091
1092 /* Validate at least 1 bit is set */
1093 assert(sparsebit_any_set(s));
1094
1095 nodep = node_first(s);
1096 return node_first_set(nodep, 0);
1097}
1098
1099/* Returns the index of the first cleared bit. Abort if
1100 * no bits are cleared.
1101 */
1102sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
1103{
1104 struct node *nodep1, *nodep2;
1105
1106 /* Validate at least 1 bit is cleared. */
1107 assert(sparsebit_any_clear(s));
1108
1109 /* If no nodes or first node index > 0 then lowest cleared is 0 */
1110 nodep1 = node_first(s);
1111 if (!nodep1 || nodep1->idx > 0)
1112 return 0;
1113
1114 /* Does the mask in the first node contain any cleared bits. */
1115 if (nodep1->mask != ~(mask_t) 0)
1116 return node_first_clear(nodep1, 0);
1117
1118 /*
1119 * All mask bits set in first node. If there isn't a second node
1120 * then the first cleared bit is the first bit after the bits
1121 * described by the first node.
1122 */
1123 nodep2 = node_next(s, nodep1);
1124 if (!nodep2) {
1125 /*
1126 * No second node. First cleared bit is first bit beyond
1127 * bits described by first node.
1128 */
1129 assert(nodep1->mask == ~(mask_t) 0);
1130 assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
1131 return nodep1->idx + MASK_BITS + nodep1->num_after;
1132 }
1133
1134 /*
1135 * There is a second node.
1136 * If it is not adjacent to the first node, then there is a gap
1137 * of cleared bits between the nodes, and the first cleared bit
1138 * is the first bit within the gap.
1139 */
1140 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1141 return nodep1->idx + MASK_BITS + nodep1->num_after;
1142
1143 /*
1144 * Second node is adjacent to the first node.
1145 * Because it is adjacent, its mask should be non-zero. If all
1146 * its mask bits are set, then with it being adjacent, it should
1147 * have had the mask bits moved into the num_after setting of the
1148 * previous node.
1149 */
1150 return node_first_clear(nodep2, 0);
1151}
1152
1153/* Returns index of next bit set within s after the index given by prev.
1154 * Returns 0 if there are no bits after prev that are set.
1155 */
1156sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
1157 sparsebit_idx_t prev)
1158{
1159 sparsebit_idx_t lowest_possible = prev + 1;
1160 sparsebit_idx_t start;
1161 struct node *nodep;
1162
1163 /* A bit after the highest index can't be set. */
1164 if (lowest_possible == 0)
1165 return 0;
1166
1167 /*
1168 * Find the leftmost 'candidate' overlapping or to the right
1169 * of lowest_possible.
1170 */
1171 struct node *candidate = NULL;
1172
1173 /* True iff lowest_possible is within candidate */
1174 bool contains = false;
1175
1176 /*
1177 * Find node that describes setting of bit at lowest_possible.
1178 * If such a node doesn't exist, find the node with the lowest
1179 * starting index that is > lowest_possible.
1180 */
1181 for (nodep = s->root; nodep;) {
1182 if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
1183 >= lowest_possible) {
1184 candidate = nodep;
1185 if (candidate->idx <= lowest_possible) {
1186 contains = true;
1187 break;
1188 }
1189 nodep = nodep->left;
1190 } else {
1191 nodep = nodep->right;
1192 }
1193 }
1194 if (!candidate)
1195 return 0;
1196
1197 assert(candidate->mask != 0);
1198
1199 /* Does the candidate node describe the setting of lowest_possible? */
1200 if (!contains) {
1201 /*
1202 * Candidate doesn't describe setting of bit at lowest_possible.
1203 * Candidate points to the first node with a starting index
1204 * > lowest_possible.
1205 */
1206 assert(candidate->idx > lowest_possible);
1207
1208 return node_first_set(candidate, 0);
1209 }
1210
1211 /*
1212 * Candidate describes setting of bit at lowest_possible.
1213 * Note: although the node describes the setting of the bit
1214 * at lowest_possible, its possible that its setting and the
1215 * setting of all latter bits described by this node are 0.
1216 * For now, just handle the cases where this node describes
1217 * a bit at or after an index of lowest_possible that is set.
1218 */
1219 start = lowest_possible - candidate->idx;
1220
1221 if (start < MASK_BITS && candidate->mask >= (1 << start))
1222 return node_first_set(candidate, start);
1223
1224 if (candidate->num_after) {
1225 sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
1226
1227 return lowest_possible < first_num_after_idx
1228 ? first_num_after_idx : lowest_possible;
1229 }
1230
1231 /*
1232 * Although candidate node describes setting of bit at
1233 * the index of lowest_possible, all bits at that index and
1234 * latter that are described by candidate are cleared. With
1235 * this, the next bit is the first bit in the next node, if
1236 * such a node exists. If a next node doesn't exist, then
1237 * there is no next set bit.
1238 */
1239 candidate = node_next(s, candidate);
1240 if (!candidate)
1241 return 0;
1242
1243 return node_first_set(candidate, 0);
1244}
1245
1246/* Returns index of next bit cleared within s after the index given by prev.
1247 * Returns 0 if there are no bits after prev that are cleared.
1248 */
1249sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
1250 sparsebit_idx_t prev)
1251{
1252 sparsebit_idx_t lowest_possible = prev + 1;
1253 sparsebit_idx_t idx;
1254 struct node *nodep1, *nodep2;
1255
1256 /* A bit after the highest index can't be set. */
1257 if (lowest_possible == 0)
1258 return 0;
1259
1260 /*
1261 * Does a node describing the setting of lowest_possible exist?
1262 * If not, the bit at lowest_possible is cleared.
1263 */
1264 nodep1 = node_find(s, lowest_possible);
1265 if (!nodep1)
1266 return lowest_possible;
1267
1268 /* Does a mask bit in node 1 describe the next cleared bit. */
1269 for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
1270 if (!(nodep1->mask & (1 << idx)))
1271 return nodep1->idx + idx;
1272
1273 /*
1274 * Next cleared bit is not described by node 1. If there
1275 * isn't a next node, then next cleared bit is described
1276 * by bit after the bits described by the first node.
1277 */
1278 nodep2 = node_next(s, nodep1);
1279 if (!nodep2)
1280 return nodep1->idx + MASK_BITS + nodep1->num_after;
1281
1282 /*
1283 * There is a second node.
1284 * If it is not adjacent to the first node, then there is a gap
1285 * of cleared bits between the nodes, and the next cleared bit
1286 * is the first bit within the gap.
1287 */
1288 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1289 return nodep1->idx + MASK_BITS + nodep1->num_after;
1290
1291 /*
1292 * Second node is adjacent to the first node.
1293 * Because it is adjacent, its mask should be non-zero. If all
1294 * its mask bits are set, then with it being adjacent, it should
1295 * have had the mask bits moved into the num_after setting of the
1296 * previous node.
1297 */
1298 return node_first_clear(nodep2, 0);
1299}
1300
1301/* Starting with the index 1 greater than the index given by start, finds
1302 * and returns the index of the first sequence of num consecutively set
1303 * bits. Returns a value of 0 of no such sequence exists.
1304 */
1305sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
1306 sparsebit_idx_t start, sparsebit_num_t num)
1307{
1308 sparsebit_idx_t idx;
1309
1310 assert(num >= 1);
1311
1312 for (idx = sparsebit_next_set(s, start);
1313 idx != 0 && idx + num - 1 >= idx;
1314 idx = sparsebit_next_set(s, idx)) {
1315 assert(sparsebit_is_set(s, idx));
1316
1317 /*
1318 * Does the sequence of bits starting at idx consist of
1319 * num set bits?
1320 */
1321 if (sparsebit_is_set_num(s, idx, num))
1322 return idx;
1323
1324 /*
1325 * Sequence of set bits at idx isn't large enough.
1326 * Skip this entire sequence of set bits.
1327 */
1328 idx = sparsebit_next_clear(s, idx);
1329 if (idx == 0)
1330 return 0;
1331 }
1332
1333 return 0;
1334}
1335
1336/* Starting with the index 1 greater than the index given by start, finds
1337 * and returns the index of the first sequence of num consecutively cleared
1338 * bits. Returns a value of 0 of no such sequence exists.
1339 */
1340sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
1341 sparsebit_idx_t start, sparsebit_num_t num)
1342{
1343 sparsebit_idx_t idx;
1344
1345 assert(num >= 1);
1346
1347 for (idx = sparsebit_next_clear(s, start);
1348 idx != 0 && idx + num - 1 >= idx;
1349 idx = sparsebit_next_clear(s, idx)) {
1350 assert(sparsebit_is_clear(s, idx));
1351
1352 /*
1353 * Does the sequence of bits starting at idx consist of
1354 * num cleared bits?
1355 */
1356 if (sparsebit_is_clear_num(s, idx, num))
1357 return idx;
1358
1359 /*
1360 * Sequence of cleared bits at idx isn't large enough.
1361 * Skip this entire sequence of cleared bits.
1362 */
1363 idx = sparsebit_next_set(s, idx);
1364 if (idx == 0)
1365 return 0;
1366 }
1367
1368 return 0;
1369}
1370
1371/* Sets the bits * in the inclusive range idx through idx + num - 1. */
1372void sparsebit_set_num(struct sparsebit *s,
1373 sparsebit_idx_t start, sparsebit_num_t num)
1374{
1375 struct node *nodep, *next;
1376 unsigned int n1;
1377 sparsebit_idx_t idx;
1378 sparsebit_num_t n;
1379 sparsebit_idx_t middle_start, middle_end;
1380
1381 assert(num > 0);
1382 assert(start + num - 1 >= start);
1383
1384 /*
1385 * Leading - bits before first mask boundary.
1386 *
1387 * TODO(lhuemill): With some effort it may be possible to
1388 * replace the following loop with a sequential sequence
1389 * of statements. High level sequence would be:
1390 *
1391 * 1. Use node_split() to force node that describes setting
1392 * of idx to be within the mask portion of a node.
1393 * 2. Form mask of bits to be set.
1394 * 3. Determine number of mask bits already set in the node
1395 * and store in a local variable named num_already_set.
1396 * 4. Set the appropriate mask bits within the node.
1397 * 5. Increment struct sparsebit_pvt num_set member
1398 * by the number of bits that were actually set.
1399 * Exclude from the counts bits that were already set.
1400 * 6. Before returning to the caller, use node_reduce() to
1401 * handle the multiple corner cases that this method
1402 * introduces.
1403 */
1404 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1405 bit_set(s, idx);
1406
1407 /* Middle - bits spanning one or more entire mask */
1408 middle_start = idx;
1409 middle_end = middle_start + (n & -MASK_BITS) - 1;
1410 if (n >= MASK_BITS) {
1411 nodep = node_split(s, middle_start);
1412
1413 /*
1414 * As needed, split just after end of middle bits.
1415 * No split needed if end of middle bits is at highest
1416 * supported bit index.
1417 */
1418 if (middle_end + 1 > middle_end)
1419 (void) node_split(s, middle_end + 1);
1420
1421 /* Delete nodes that only describe bits within the middle. */
1422 for (next = node_next(s, nodep);
1423 next && (next->idx < middle_end);
1424 next = node_next(s, nodep)) {
1425 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1426 node_rm(s, next);
1427 next = NULL;
1428 }
1429
1430 /* As needed set each of the mask bits */
1431 for (n1 = 0; n1 < MASK_BITS; n1++) {
1432 if (!(nodep->mask & (1 << n1))) {
1433 nodep->mask |= 1 << n1;
1434 s->num_set++;
1435 }
1436 }
1437
1438 s->num_set -= nodep->num_after;
1439 nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
1440 s->num_set += nodep->num_after;
1441
1442 node_reduce(s, nodep);
1443 }
1444 idx = middle_end + 1;
1445 n -= middle_end - middle_start + 1;
1446
1447 /* Trailing - bits at and beyond last mask boundary */
1448 assert(n < MASK_BITS);
1449 for (; n > 0; idx++, n--)
1450 bit_set(s, idx);
1451}
1452
1453/* Clears the bits * in the inclusive range idx through idx + num - 1. */
1454void sparsebit_clear_num(struct sparsebit *s,
1455 sparsebit_idx_t start, sparsebit_num_t num)
1456{
1457 struct node *nodep, *next;
1458 unsigned int n1;
1459 sparsebit_idx_t idx;
1460 sparsebit_num_t n;
1461 sparsebit_idx_t middle_start, middle_end;
1462
1463 assert(num > 0);
1464 assert(start + num - 1 >= start);
1465
1466 /* Leading - bits before first mask boundary */
1467 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1468 bit_clear(s, idx);
1469
1470 /* Middle - bits spanning one or more entire mask */
1471 middle_start = idx;
1472 middle_end = middle_start + (n & -MASK_BITS) - 1;
1473 if (n >= MASK_BITS) {
1474 nodep = node_split(s, middle_start);
1475
1476 /*
1477 * As needed, split just after end of middle bits.
1478 * No split needed if end of middle bits is at highest
1479 * supported bit index.
1480 */
1481 if (middle_end + 1 > middle_end)
1482 (void) node_split(s, middle_end + 1);
1483
1484 /* Delete nodes that only describe bits within the middle. */
1485 for (next = node_next(s, nodep);
1486 next && (next->idx < middle_end);
1487 next = node_next(s, nodep)) {
1488 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1489 node_rm(s, next);
1490 next = NULL;
1491 }
1492
1493 /* As needed clear each of the mask bits */
1494 for (n1 = 0; n1 < MASK_BITS; n1++) {
1495 if (nodep->mask & (1 << n1)) {
1496 nodep->mask &= ~(1 << n1);
1497 s->num_set--;
1498 }
1499 }
1500
1501 /* Clear any bits described by num_after */
1502 s->num_set -= nodep->num_after;
1503 nodep->num_after = 0;
1504
1505 /*
1506 * Delete the node that describes the beginning of
1507 * the middle bits and perform any allowed reductions
1508 * with the nodes prev or next of nodep.
1509 */
1510 node_reduce(s, nodep);
1511 nodep = NULL;
1512 }
1513 idx = middle_end + 1;
1514 n -= middle_end - middle_start + 1;
1515
1516 /* Trailing - bits at and beyond last mask boundary */
1517 assert(n < MASK_BITS);
1518 for (; n > 0; idx++, n--)
1519 bit_clear(s, idx);
1520}
1521
1522/* Sets the bit at the index given by idx. */
1523void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
1524{
1525 sparsebit_set_num(s, idx, 1);
1526}
1527
1528/* Clears the bit at the index given by idx. */
1529void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
1530{
1531 sparsebit_clear_num(s, idx, 1);
1532}
1533
1534/* Sets the bits in the entire addressable range of the sparsebit array. */
1535void sparsebit_set_all(struct sparsebit *s)
1536{
1537 sparsebit_set(s, 0);
1538 sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
1539 assert(sparsebit_all_set(s));
1540}
1541
1542/* Clears the bits in the entire addressable range of the sparsebit array. */
1543void sparsebit_clear_all(struct sparsebit *s)
1544{
1545 sparsebit_clear(s, 0);
1546 sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
1547 assert(!sparsebit_any_set(s));
1548}
1549
1550static size_t display_range(FILE *stream, sparsebit_idx_t low,
1551 sparsebit_idx_t high, bool prepend_comma_space)
1552{
1553 char *fmt_str;
1554 size_t sz;
1555
1556 /* Determine the printf format string */
1557 if (low == high)
1558 fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
1559 else
1560 fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
1561
1562 /*
1563 * When stream is NULL, just determine the size of what would
1564 * have been printed, else print the range.
1565 */
1566 if (!stream)
1567 sz = snprintf(NULL, 0, fmt_str, low, high);
1568 else
1569 sz = fprintf(stream, fmt_str, low, high);
1570
1571 return sz;
1572}
1573
1574
1575/* Dumps to the FILE stream given by stream, the bit settings
1576 * of s. Each line of output is prefixed with the number of
1577 * spaces given by indent. The length of each line is implementation
1578 * dependent and does not depend on the indent amount. The following
1579 * is an example output of a sparsebit array that has bits:
1580 *
1581 * 0x5, 0x8, 0xa:0xe, 0x12
1582 *
1583 * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
1584 * are set. Note that a ':', instead of a '-' is used to specify a range of
1585 * contiguous bits. This is done because '-' is used to specify command-line
1586 * options, and sometimes ranges are specified as command-line arguments.
1587 */
1588void sparsebit_dump(FILE *stream, struct sparsebit *s,
1589 unsigned int indent)
1590{
1591 size_t current_line_len = 0;
1592 size_t sz;
1593 struct node *nodep;
1594
1595 if (!sparsebit_any_set(s))
1596 return;
1597
1598 /* Display initial indent */
1599 fprintf(stream, "%*s", indent, "");
1600
1601 /* For each node */
1602 for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
1603 unsigned int n1;
1604 sparsebit_idx_t low, high;
1605
1606 /* For each group of bits in the mask */
1607 for (n1 = 0; n1 < MASK_BITS; n1++) {
1608 if (nodep->mask & (1 << n1)) {
1609 low = high = nodep->idx + n1;
1610
1611 for (; n1 < MASK_BITS; n1++) {
1612 if (nodep->mask & (1 << n1))
1613 high = nodep->idx + n1;
1614 else
1615 break;
1616 }
1617
1618 if ((n1 == MASK_BITS) && nodep->num_after)
1619 high += nodep->num_after;
1620
1621 /*
1622 * How much room will it take to display
1623 * this range.
1624 */
1625 sz = display_range(NULL, low, high,
1626 current_line_len != 0);
1627
1628 /*
1629 * If there is not enough room, display
1630 * a newline plus the indent of the next
1631 * line.
1632 */
1633 if (current_line_len + sz > DUMP_LINE_MAX) {
1634 fputs("\n", stream);
1635 fprintf(stream, "%*s", indent, "");
1636 current_line_len = 0;
1637 }
1638
1639 /* Display the range */
1640 sz = display_range(stream, low, high,
1641 current_line_len != 0);
1642 current_line_len += sz;
1643 }
1644 }
1645
1646 /*
1647 * If num_after and most significant-bit of mask is not
1648 * set, then still need to display a range for the bits
1649 * described by num_after.
1650 */
1651 if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
1652 low = nodep->idx + MASK_BITS;
1653 high = nodep->idx + MASK_BITS + nodep->num_after - 1;
1654
1655 /*
1656 * How much room will it take to display
1657 * this range.
1658 */
1659 sz = display_range(NULL, low, high,
1660 current_line_len != 0);
1661
1662 /*
1663 * If there is not enough room, display
1664 * a newline plus the indent of the next
1665 * line.
1666 */
1667 if (current_line_len + sz > DUMP_LINE_MAX) {
1668 fputs("\n", stream);
1669 fprintf(stream, "%*s", indent, "");
1670 current_line_len = 0;
1671 }
1672
1673 /* Display the range */
1674 sz = display_range(stream, low, high,
1675 current_line_len != 0);
1676 current_line_len += sz;
1677 }
1678 }
1679 fputs("\n", stream);
1680}
1681
1682/* Validates the internal state of the sparsebit array given by
1683 * s. On error, diagnostic information is printed to stderr and
1684 * abort is called.
1685 */
1686void sparsebit_validate_internal(struct sparsebit *s)
1687{
1688 bool error_detected = false;
1689 struct node *nodep, *prev = NULL;
1690 sparsebit_num_t total_bits_set = 0;
1691 unsigned int n1;
1692
1693 /* For each node */
1694 for (nodep = node_first(s); nodep;
1695 prev = nodep, nodep = node_next(s, nodep)) {
1696
1697 /*
1698 * Increase total bits set by the number of bits set
1699 * in this node.
1700 */
1701 for (n1 = 0; n1 < MASK_BITS; n1++)
1702 if (nodep->mask & (1 << n1))
1703 total_bits_set++;
1704
1705 total_bits_set += nodep->num_after;
1706
1707 /*
1708 * Arbitrary choice as to whether a mask of 0 is allowed
1709 * or not. For diagnostic purposes it is beneficial to
1710 * have only one valid means to represent a set of bits.
1711 * To support this an arbitrary choice has been made
1712 * to not allow a mask of zero.
1713 */
1714 if (nodep->mask == 0) {
1715 fprintf(stderr, "Node mask of zero, "
1716 "nodep: %p nodep->mask: 0x%x",
1717 nodep, nodep->mask);
1718 error_detected = true;
1719 break;
1720 }
1721
1722 /*
1723 * Validate num_after is not greater than the max index
1724 * - the number of mask bits. The num_after member
1725 * uses 0-based indexing and thus has no value that
1726 * represents all bits set. This limitation is handled
1727 * by requiring a non-zero mask. With a non-zero mask,
1728 * MASK_BITS worth of bits are described by the mask,
1729 * which makes the largest needed num_after equal to:
1730 *
1731 * (~(sparsebit_num_t) 0) - MASK_BITS + 1
1732 */
1733 if (nodep->num_after
1734 > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
1735 fprintf(stderr, "num_after too large, "
1736 "nodep: %p nodep->num_after: 0x%lx",
1737 nodep, nodep->num_after);
1738 error_detected = true;
1739 break;
1740 }
1741
1742 /* Validate node index is divisible by the mask size */
1743 if (nodep->idx % MASK_BITS) {
1744 fprintf(stderr, "Node index not divisable by "
1745 "mask size,\n"
1746 " nodep: %p nodep->idx: 0x%lx "
1747 "MASK_BITS: %lu\n",
1748 nodep, nodep->idx, MASK_BITS);
1749 error_detected = true;
1750 break;
1751 }
1752
1753 /*
1754 * Validate bits described by node don't wrap beyond the
1755 * highest supported index.
1756 */
1757 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
1758 fprintf(stderr, "Bits described by node wrap "
1759 "beyond highest supported index,\n"
1760 " nodep: %p nodep->idx: 0x%lx\n"
1761 " MASK_BITS: %lu nodep->num_after: 0x%lx",
1762 nodep, nodep->idx, MASK_BITS, nodep->num_after);
1763 error_detected = true;
1764 break;
1765 }
1766
1767 /* Check parent pointers. */
1768 if (nodep->left) {
1769 if (nodep->left->parent != nodep) {
1770 fprintf(stderr, "Left child parent pointer "
1771 "doesn't point to this node,\n"
1772 " nodep: %p nodep->left: %p "
1773 "nodep->left->parent: %p",
1774 nodep, nodep->left,
1775 nodep->left->parent);
1776 error_detected = true;
1777 break;
1778 }
1779 }
1780
1781 if (nodep->right) {
1782 if (nodep->right->parent != nodep) {
1783 fprintf(stderr, "Right child parent pointer "
1784 "doesn't point to this node,\n"
1785 " nodep: %p nodep->right: %p "
1786 "nodep->right->parent: %p",
1787 nodep, nodep->right,
1788 nodep->right->parent);
1789 error_detected = true;
1790 break;
1791 }
1792 }
1793
1794 if (!nodep->parent) {
1795 if (s->root != nodep) {
1796 fprintf(stderr, "Unexpected root node, "
1797 "s->root: %p nodep: %p",
1798 s->root, nodep);
1799 error_detected = true;
1800 break;
1801 }
1802 }
1803
1804 if (prev) {
1805 /*
1806 * Is index of previous node before index of
1807 * current node?
1808 */
1809 if (prev->idx >= nodep->idx) {
1810 fprintf(stderr, "Previous node index "
1811 ">= current node index,\n"
1812 " prev: %p prev->idx: 0x%lx\n"
1813 " nodep: %p nodep->idx: 0x%lx",
1814 prev, prev->idx, nodep, nodep->idx);
1815 error_detected = true;
1816 break;
1817 }
1818
1819 /*
1820 * Nodes occur in asscending order, based on each
1821 * nodes starting index.
1822 */
1823 if ((prev->idx + MASK_BITS + prev->num_after - 1)
1824 >= nodep->idx) {
1825 fprintf(stderr, "Previous node bit range "
1826 "overlap with current node bit range,\n"
1827 " prev: %p prev->idx: 0x%lx "
1828 "prev->num_after: 0x%lx\n"
1829 " nodep: %p nodep->idx: 0x%lx "
1830 "nodep->num_after: 0x%lx\n"
1831 " MASK_BITS: %lu",
1832 prev, prev->idx, prev->num_after,
1833 nodep, nodep->idx, nodep->num_after,
1834 MASK_BITS);
1835 error_detected = true;
1836 break;
1837 }
1838
1839 /*
1840 * When the node has all mask bits set, it shouldn't
1841 * be adjacent to the last bit described by the
1842 * previous node.
1843 */
1844 if (nodep->mask == ~(mask_t) 0 &&
1845 prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
1846 fprintf(stderr, "Current node has mask with "
1847 "all bits set and is adjacent to the "
1848 "previous node,\n"
1849 " prev: %p prev->idx: 0x%lx "
1850 "prev->num_after: 0x%lx\n"
1851 " nodep: %p nodep->idx: 0x%lx "
1852 "nodep->num_after: 0x%lx\n"
1853 " MASK_BITS: %lu",
1854 prev, prev->idx, prev->num_after,
1855 nodep, nodep->idx, nodep->num_after,
1856 MASK_BITS);
1857
1858 error_detected = true;
1859 break;
1860 }
1861 }
1862 }
1863
1864 if (!error_detected) {
1865 /*
1866 * Is sum of bits set in each node equal to the count
1867 * of total bits set.
1868 */
1869 if (s->num_set != total_bits_set) {
1870 fprintf(stderr, "Number of bits set missmatch,\n"
1871 " s->num_set: 0x%lx total_bits_set: 0x%lx",
1872 s->num_set, total_bits_set);
1873
1874 error_detected = true;
1875 }
1876 }
1877
1878 if (error_detected) {
1879 fputs(" dump_internal:\n", stderr);
1880 sparsebit_dump_internal(stderr, s, 4);
1881 abort();
1882 }
1883}
1884
1885
1886#ifdef FUZZ
1887/* A simple but effective fuzzing driver. Look for bugs with the help
1888 * of some invariants and of a trivial representation of sparsebit.
1889 * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
1890 * afl-fuzz do the magic. :)
1891 */
1892
1893#include <stdlib.h>
1894#include <assert.h>
1895
1896struct range {
1897 sparsebit_idx_t first, last;
1898 bool set;
1899};
1900
1901struct sparsebit *s;
1902struct range ranges[1000];
1903int num_ranges;
1904
1905static bool get_value(sparsebit_idx_t idx)
1906{
1907 int i;
1908
1909 for (i = num_ranges; --i >= 0; )
1910 if (ranges[i].first <= idx && idx <= ranges[i].last)
1911 return ranges[i].set;
1912
1913 return false;
1914}
1915
1916static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
1917{
1918 sparsebit_num_t num;
1919 sparsebit_idx_t next;
1920
1921 if (first < last) {
1922 num = last - first + 1;
1923 } else {
1924 num = first - last + 1;
1925 first = last;
1926 last = first + num - 1;
1927 }
1928
1929 switch (code) {
1930 case 0:
1931 sparsebit_set(s, first);
1932 assert(sparsebit_is_set(s, first));
1933 assert(!sparsebit_is_clear(s, first));
1934 assert(sparsebit_any_set(s));
1935 assert(!sparsebit_all_clear(s));
1936 if (get_value(first))
1937 return;
1938 if (num_ranges == 1000)
1939 exit(0);
1940 ranges[num_ranges++] = (struct range)
1941 { .first = first, .last = first, .set = true };
1942 break;
1943 case 1:
1944 sparsebit_clear(s, first);
1945 assert(!sparsebit_is_set(s, first));
1946 assert(sparsebit_is_clear(s, first));
1947 assert(sparsebit_any_clear(s));
1948 assert(!sparsebit_all_set(s));
1949 if (!get_value(first))
1950 return;
1951 if (num_ranges == 1000)
1952 exit(0);
1953 ranges[num_ranges++] = (struct range)
1954 { .first = first, .last = first, .set = false };
1955 break;
1956 case 2:
1957 assert(sparsebit_is_set(s, first) == get_value(first));
1958 assert(sparsebit_is_clear(s, first) == !get_value(first));
1959 break;
1960 case 3:
1961 if (sparsebit_any_set(s))
1962 assert(get_value(sparsebit_first_set(s)));
1963 if (sparsebit_any_clear(s))
1964 assert(!get_value(sparsebit_first_clear(s)));
1965 sparsebit_set_all(s);
1966 assert(!sparsebit_any_clear(s));
1967 assert(sparsebit_all_set(s));
1968 num_ranges = 0;
1969 ranges[num_ranges++] = (struct range)
1970 { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
1971 break;
1972 case 4:
1973 if (sparsebit_any_set(s))
1974 assert(get_value(sparsebit_first_set(s)));
1975 if (sparsebit_any_clear(s))
1976 assert(!get_value(sparsebit_first_clear(s)));
1977 sparsebit_clear_all(s);
1978 assert(!sparsebit_any_set(s));
1979 assert(sparsebit_all_clear(s));
1980 num_ranges = 0;
1981 break;
1982 case 5:
1983 next = sparsebit_next_set(s, first);
1984 assert(next == 0 || next > first);
1985 assert(next == 0 || get_value(next));
1986 break;
1987 case 6:
1988 next = sparsebit_next_clear(s, first);
1989 assert(next == 0 || next > first);
1990 assert(next == 0 || !get_value(next));
1991 break;
1992 case 7:
1993 next = sparsebit_next_clear(s, first);
1994 if (sparsebit_is_set_num(s, first, num)) {
1995 assert(next == 0 || next > last);
1996 if (first)
1997 next = sparsebit_next_set(s, first - 1);
1998 else if (sparsebit_any_set(s))
1999 next = sparsebit_first_set(s);
2000 else
2001 return;
2002 assert(next == first);
2003 } else {
2004 assert(sparsebit_is_clear(s, first) || next <= last);
2005 }
2006 break;
2007 case 8:
2008 next = sparsebit_next_set(s, first);
2009 if (sparsebit_is_clear_num(s, first, num)) {
2010 assert(next == 0 || next > last);
2011 if (first)
2012 next = sparsebit_next_clear(s, first - 1);
2013 else if (sparsebit_any_clear(s))
2014 next = sparsebit_first_clear(s);
2015 else
2016 return;
2017 assert(next == first);
2018 } else {
2019 assert(sparsebit_is_set(s, first) || next <= last);
2020 }
2021 break;
2022 case 9:
2023 sparsebit_set_num(s, first, num);
2024 assert(sparsebit_is_set_num(s, first, num));
2025 assert(!sparsebit_is_clear_num(s, first, num));
2026 assert(sparsebit_any_set(s));
2027 assert(!sparsebit_all_clear(s));
2028 if (num_ranges == 1000)
2029 exit(0);
2030 ranges[num_ranges++] = (struct range)
2031 { .first = first, .last = last, .set = true };
2032 break;
2033 case 10:
2034 sparsebit_clear_num(s, first, num);
2035 assert(!sparsebit_is_set_num(s, first, num));
2036 assert(sparsebit_is_clear_num(s, first, num));
2037 assert(sparsebit_any_clear(s));
2038 assert(!sparsebit_all_set(s));
2039 if (num_ranges == 1000)
2040 exit(0);
2041 ranges[num_ranges++] = (struct range)
2042 { .first = first, .last = last, .set = false };
2043 break;
2044 case 11:
2045 sparsebit_validate_internal(s);
2046 break;
2047 default:
2048 break;
2049 }
2050}
2051
2052unsigned char get8(void)
2053{
2054 int ch;
2055
2056 ch = getchar();
2057 if (ch == EOF)
2058 exit(0);
2059 return ch;
2060}
2061
2062uint64_t get64(void)
2063{
2064 uint64_t x;
2065
2066 x = get8();
2067 x = (x << 8) | get8();
2068 x = (x << 8) | get8();
2069 x = (x << 8) | get8();
2070 x = (x << 8) | get8();
2071 x = (x << 8) | get8();
2072 x = (x << 8) | get8();
2073 return (x << 8) | get8();
2074}
2075
2076int main(void)
2077{
2078 s = sparsebit_alloc();
2079 for (;;) {
2080 uint8_t op = get8() & 0xf;
2081 uint64_t first = get64();
2082 uint64_t last = get64();
2083
2084 operate(op, first, last);
2085 }
2086}
2087#endif
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c
new file mode 100644
index 000000000000..12df46280b23
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86.c
@@ -0,0 +1,697 @@
1/*
2 * tools/testing/selftests/kvm/lib/x86.c
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 */
8
9#define _GNU_SOURCE /* for program_invocation_name */
10
11#include "test_util.h"
12#include "kvm_util.h"
13#include "kvm_util_internal.h"
14#include "x86.h"
15
16/* Minimum physical address used for virtual translation tables. */
17#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
18
19/* Virtual translation table structure declarations */
20struct pageMapL4Entry {
21 uint64_t present:1;
22 uint64_t writable:1;
23 uint64_t user:1;
24 uint64_t write_through:1;
25 uint64_t cache_disable:1;
26 uint64_t accessed:1;
27 uint64_t ignored_06:1;
28 uint64_t page_size:1;
29 uint64_t ignored_11_08:4;
30 uint64_t address:40;
31 uint64_t ignored_62_52:11;
32 uint64_t execute_disable:1;
33};
34
35struct pageDirectoryPointerEntry {
36 uint64_t present:1;
37 uint64_t writable:1;
38 uint64_t user:1;
39 uint64_t write_through:1;
40 uint64_t cache_disable:1;
41 uint64_t accessed:1;
42 uint64_t ignored_06:1;
43 uint64_t page_size:1;
44 uint64_t ignored_11_08:4;
45 uint64_t address:40;
46 uint64_t ignored_62_52:11;
47 uint64_t execute_disable:1;
48};
49
50struct pageDirectoryEntry {
51 uint64_t present:1;
52 uint64_t writable:1;
53 uint64_t user:1;
54 uint64_t write_through:1;
55 uint64_t cache_disable:1;
56 uint64_t accessed:1;
57 uint64_t ignored_06:1;
58 uint64_t page_size:1;
59 uint64_t ignored_11_08:4;
60 uint64_t address:40;
61 uint64_t ignored_62_52:11;
62 uint64_t execute_disable:1;
63};
64
65struct pageTableEntry {
66 uint64_t present:1;
67 uint64_t writable:1;
68 uint64_t user:1;
69 uint64_t write_through:1;
70 uint64_t cache_disable:1;
71 uint64_t accessed:1;
72 uint64_t dirty:1;
73 uint64_t reserved_07:1;
74 uint64_t global:1;
75 uint64_t ignored_11_09:3;
76 uint64_t address:40;
77 uint64_t ignored_62_52:11;
78 uint64_t execute_disable:1;
79};
80
81/* Register Dump
82 *
83 * Input Args:
84 * indent - Left margin indent amount
85 * regs - register
86 *
87 * Output Args:
88 * stream - Output FILE stream
89 *
90 * Return: None
91 *
92 * Dumps the state of the registers given by regs, to the FILE stream
93 * given by steam.
94 */
95void regs_dump(FILE *stream, struct kvm_regs *regs,
96 uint8_t indent)
97{
98 fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
99 "rcx: 0x%.16llx rdx: 0x%.16llx\n",
100 indent, "",
101 regs->rax, regs->rbx, regs->rcx, regs->rdx);
102 fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
103 "rsp: 0x%.16llx rbp: 0x%.16llx\n",
104 indent, "",
105 regs->rsi, regs->rdi, regs->rsp, regs->rbp);
106 fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
107 "r10: 0x%.16llx r11: 0x%.16llx\n",
108 indent, "",
109 regs->r8, regs->r9, regs->r10, regs->r11);
110 fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
111 "r14: 0x%.16llx r15: 0x%.16llx\n",
112 indent, "",
113 regs->r12, regs->r13, regs->r14, regs->r15);
114 fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
115 indent, "",
116 regs->rip, regs->rflags);
117}
118
119/* Segment Dump
120 *
121 * Input Args:
122 * indent - Left margin indent amount
123 * segment - KVM segment
124 *
125 * Output Args:
126 * stream - Output FILE stream
127 *
128 * Return: None
129 *
130 * Dumps the state of the KVM segment given by segment, to the FILE stream
131 * given by steam.
132 */
133static void segment_dump(FILE *stream, struct kvm_segment *segment,
134 uint8_t indent)
135{
136 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
137 "selector: 0x%.4x type: 0x%.2x\n",
138 indent, "", segment->base, segment->limit,
139 segment->selector, segment->type);
140 fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
141 "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
142 indent, "", segment->present, segment->dpl,
143 segment->db, segment->s, segment->l);
144 fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
145 "unusable: 0x%.2x padding: 0x%.2x\n",
146 indent, "", segment->g, segment->avl,
147 segment->unusable, segment->padding);
148}
149
150/* dtable Dump
151 *
152 * Input Args:
153 * indent - Left margin indent amount
154 * dtable - KVM dtable
155 *
156 * Output Args:
157 * stream - Output FILE stream
158 *
159 * Return: None
160 *
161 * Dumps the state of the KVM dtable given by dtable, to the FILE stream
162 * given by steam.
163 */
164static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
165 uint8_t indent)
166{
167 fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
168 "padding: 0x%.4x 0x%.4x 0x%.4x\n",
169 indent, "", dtable->base, dtable->limit,
170 dtable->padding[0], dtable->padding[1], dtable->padding[2]);
171}
172
173/* System Register Dump
174 *
175 * Input Args:
176 * indent - Left margin indent amount
177 * sregs - System registers
178 *
179 * Output Args:
180 * stream - Output FILE stream
181 *
182 * Return: None
183 *
184 * Dumps the state of the system registers given by sregs, to the FILE stream
185 * given by steam.
186 */
187void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
188 uint8_t indent)
189{
190 unsigned int i;
191
192 fprintf(stream, "%*scs:\n", indent, "");
193 segment_dump(stream, &sregs->cs, indent + 2);
194 fprintf(stream, "%*sds:\n", indent, "");
195 segment_dump(stream, &sregs->ds, indent + 2);
196 fprintf(stream, "%*ses:\n", indent, "");
197 segment_dump(stream, &sregs->es, indent + 2);
198 fprintf(stream, "%*sfs:\n", indent, "");
199 segment_dump(stream, &sregs->fs, indent + 2);
200 fprintf(stream, "%*sgs:\n", indent, "");
201 segment_dump(stream, &sregs->gs, indent + 2);
202 fprintf(stream, "%*sss:\n", indent, "");
203 segment_dump(stream, &sregs->ss, indent + 2);
204 fprintf(stream, "%*str:\n", indent, "");
205 segment_dump(stream, &sregs->tr, indent + 2);
206 fprintf(stream, "%*sldt:\n", indent, "");
207 segment_dump(stream, &sregs->ldt, indent + 2);
208
209 fprintf(stream, "%*sgdt:\n", indent, "");
210 dtable_dump(stream, &sregs->gdt, indent + 2);
211 fprintf(stream, "%*sidt:\n", indent, "");
212 dtable_dump(stream, &sregs->idt, indent + 2);
213
214 fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
215 "cr3: 0x%.16llx cr4: 0x%.16llx\n",
216 indent, "",
217 sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
218 fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
219 "apic_base: 0x%.16llx\n",
220 indent, "",
221 sregs->cr8, sregs->efer, sregs->apic_base);
222
223 fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
224 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
225 fprintf(stream, "%*s%.16llx\n", indent + 2, "",
226 sregs->interrupt_bitmap[i]);
227 }
228}
229
230void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
231{
232 int rc;
233
234 TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
235 "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
236
237 /* If needed, create page map l4 table. */
238 if (!vm->pgd_created) {
239 vm_paddr_t paddr = vm_phy_page_alloc(vm,
240 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
241 vm->pgd = paddr;
242
243 /* Set pointer to pgd tables in all the VCPUs that
244 * have already been created. Future VCPUs will have
245 * the value set as each one is created.
246 */
247 for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
248 vcpu = vcpu->next) {
249 struct kvm_sregs sregs;
250
251 /* Obtain the current system register settings */
252 vcpu_sregs_get(vm, vcpu->id, &sregs);
253
254 /* Set and store the pointer to the start of the
255 * pgd tables.
256 */
257 sregs.cr3 = vm->pgd;
258 vcpu_sregs_set(vm, vcpu->id, &sregs);
259 }
260
261 vm->pgd_created = true;
262 }
263}
264
265/* VM Virtual Page Map
266 *
267 * Input Args:
268 * vm - Virtual Machine
269 * vaddr - VM Virtual Address
270 * paddr - VM Physical Address
271 * pgd_memslot - Memory region slot for new virtual translation tables
272 *
273 * Output Args: None
274 *
275 * Return: None
276 *
277 * Within the VM given by vm, creates a virtual translation for the page
278 * starting at vaddr to the page starting at paddr.
279 */
280void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
281 uint32_t pgd_memslot)
282{
283 uint16_t index[4];
284 struct pageMapL4Entry *pml4e;
285
286 TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
287 "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
288
289 TEST_ASSERT((vaddr % vm->page_size) == 0,
290 "Virtual address not on page boundary,\n"
291 " vaddr: 0x%lx vm->page_size: 0x%x",
292 vaddr, vm->page_size);
293 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
294 (vaddr >> vm->page_shift)),
295 "Invalid virtual address, vaddr: 0x%lx",
296 vaddr);
297 TEST_ASSERT((paddr % vm->page_size) == 0,
298 "Physical address not on page boundary,\n"
299 " paddr: 0x%lx vm->page_size: 0x%x",
300 paddr, vm->page_size);
301 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
302 "Physical address beyond beyond maximum supported,\n"
303 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
304 paddr, vm->max_gfn, vm->page_size);
305
306 index[0] = (vaddr >> 12) & 0x1ffu;
307 index[1] = (vaddr >> 21) & 0x1ffu;
308 index[2] = (vaddr >> 30) & 0x1ffu;
309 index[3] = (vaddr >> 39) & 0x1ffu;
310
311 /* Allocate page directory pointer table if not present. */
312 pml4e = addr_gpa2hva(vm, vm->pgd);
313 if (!pml4e[index[3]].present) {
314 pml4e[index[3]].address = vm_phy_page_alloc(vm,
315 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
316 >> vm->page_shift;
317 pml4e[index[3]].writable = true;
318 pml4e[index[3]].present = true;
319 }
320
321 /* Allocate page directory table if not present. */
322 struct pageDirectoryPointerEntry *pdpe;
323 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
324 if (!pdpe[index[2]].present) {
325 pdpe[index[2]].address = vm_phy_page_alloc(vm,
326 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
327 >> vm->page_shift;
328 pdpe[index[2]].writable = true;
329 pdpe[index[2]].present = true;
330 }
331
332 /* Allocate page table if not present. */
333 struct pageDirectoryEntry *pde;
334 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
335 if (!pde[index[1]].present) {
336 pde[index[1]].address = vm_phy_page_alloc(vm,
337 KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
338 >> vm->page_shift;
339 pde[index[1]].writable = true;
340 pde[index[1]].present = true;
341 }
342
343 /* Fill in page table entry. */
344 struct pageTableEntry *pte;
345 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
346 pte[index[0]].address = paddr >> vm->page_shift;
347 pte[index[0]].writable = true;
348 pte[index[0]].present = 1;
349}
350
351/* Virtual Translation Tables Dump
352 *
353 * Input Args:
354 * vm - Virtual Machine
355 * indent - Left margin indent amount
356 *
357 * Output Args:
358 * stream - Output FILE stream
359 *
360 * Return: None
361 *
362 * Dumps to the FILE stream given by stream, the contents of all the
363 * virtual translation tables for the VM given by vm.
364 */
365void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
366{
367 struct pageMapL4Entry *pml4e, *pml4e_start;
368 struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
369 struct pageDirectoryEntry *pde, *pde_start;
370 struct pageTableEntry *pte, *pte_start;
371
372 if (!vm->pgd_created)
373 return;
374
375 fprintf(stream, "%*s "
376 " no\n", indent, "");
377 fprintf(stream, "%*s index hvaddr gpaddr "
378 "addr w exec dirty\n",
379 indent, "");
380 pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
381 vm->pgd);
382 for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
383 pml4e = &pml4e_start[n1];
384 if (!pml4e->present)
385 continue;
386 fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
387 " %u\n",
388 indent, "",
389 pml4e - pml4e_start, pml4e,
390 addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
391 pml4e->writable, pml4e->execute_disable);
392
393 pdpe_start = addr_gpa2hva(vm, pml4e->address
394 * vm->page_size);
395 for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
396 pdpe = &pdpe_start[n2];
397 if (!pdpe->present)
398 continue;
399 fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
400 "%u %u\n",
401 indent, "",
402 pdpe - pdpe_start, pdpe,
403 addr_hva2gpa(vm, pdpe),
404 (uint64_t) pdpe->address, pdpe->writable,
405 pdpe->execute_disable);
406
407 pde_start = addr_gpa2hva(vm,
408 pdpe->address * vm->page_size);
409 for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
410 pde = &pde_start[n3];
411 if (!pde->present)
412 continue;
413 fprintf(stream, "%*spde 0x%-3zx %p "
414 "0x%-12lx 0x%-10lx %u %u\n",
415 indent, "", pde - pde_start, pde,
416 addr_hva2gpa(vm, pde),
417 (uint64_t) pde->address, pde->writable,
418 pde->execute_disable);
419
420 pte_start = addr_gpa2hva(vm,
421 pde->address * vm->page_size);
422 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
423 pte = &pte_start[n4];
424 if (!pte->present)
425 continue;
426 fprintf(stream, "%*spte 0x%-3zx %p "
427 "0x%-12lx 0x%-10lx %u %u "
428 " %u 0x%-10lx\n",
429 indent, "",
430 pte - pte_start, pte,
431 addr_hva2gpa(vm, pte),
432 (uint64_t) pte->address,
433 pte->writable,
434 pte->execute_disable,
435 pte->dirty,
436 ((uint64_t) n1 << 27)
437 | ((uint64_t) n2 << 18)
438 | ((uint64_t) n3 << 9)
439 | ((uint64_t) n4));
440 }
441 }
442 }
443 }
444}
445
446/* Set Unusable Segment
447 *
448 * Input Args: None
449 *
450 * Output Args:
451 * segp - Pointer to segment register
452 *
453 * Return: None
454 *
455 * Sets the segment register pointed to by segp to an unusable state.
456 */
457static void kvm_seg_set_unusable(struct kvm_segment *segp)
458{
459 memset(segp, 0, sizeof(*segp));
460 segp->unusable = true;
461}
462
463/* Set Long Mode Flat Kernel Code Segment
464 *
465 * Input Args:
466 * selector - selector value
467 *
468 * Output Args:
469 * segp - Pointer to KVM segment
470 *
471 * Return: None
472 *
473 * Sets up the KVM segment pointed to by segp, to be a code segment
474 * with the selector value given by selector.
475 */
476static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
477 struct kvm_segment *segp)
478{
479 memset(segp, 0, sizeof(*segp));
480 segp->selector = selector;
481 segp->limit = 0xFFFFFFFFu;
482 segp->s = 0x1; /* kTypeCodeData */
483 segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
484 * | kFlagCodeReadable
485 */
486 segp->g = true;
487 segp->l = true;
488 segp->present = 1;
489}
490
491/* Set Long Mode Flat Kernel Data Segment
492 *
493 * Input Args:
494 * selector - selector value
495 *
496 * Output Args:
497 * segp - Pointer to KVM segment
498 *
499 * Return: None
500 *
501 * Sets up the KVM segment pointed to by segp, to be a data segment
502 * with the selector value given by selector.
503 */
504static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
505 struct kvm_segment *segp)
506{
507 memset(segp, 0, sizeof(*segp));
508 segp->selector = selector;
509 segp->limit = 0xFFFFFFFFu;
510 segp->s = 0x1; /* kTypeCodeData */
511 segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
512 * | kFlagDataWritable
513 */
514 segp->g = true;
515 segp->present = true;
516}
517
518/* Address Guest Virtual to Guest Physical
519 *
520 * Input Args:
521 * vm - Virtual Machine
522 * gpa - VM virtual address
523 *
524 * Output Args: None
525 *
526 * Return:
527 * Equivalent VM physical address
528 *
529 * Translates the VM virtual address given by gva to a VM physical
530 * address and then locates the memory region containing the VM
531 * physical address, within the VM given by vm. When found, the host
532 * virtual address providing the memory to the vm physical address is returned.
533 * A TEST_ASSERT failure occurs if no region containing translated
534 * VM virtual address exists.
535 */
536vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
537{
538 uint16_t index[4];
539 struct pageMapL4Entry *pml4e;
540 struct pageDirectoryPointerEntry *pdpe;
541 struct pageDirectoryEntry *pde;
542 struct pageTableEntry *pte;
543 void *hva;
544
545 TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
546 "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
547
548 index[0] = (gva >> 12) & 0x1ffu;
549 index[1] = (gva >> 21) & 0x1ffu;
550 index[2] = (gva >> 30) & 0x1ffu;
551 index[3] = (gva >> 39) & 0x1ffu;
552
553 if (!vm->pgd_created)
554 goto unmapped_gva;
555 pml4e = addr_gpa2hva(vm, vm->pgd);
556 if (!pml4e[index[3]].present)
557 goto unmapped_gva;
558
559 pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
560 if (!pdpe[index[2]].present)
561 goto unmapped_gva;
562
563 pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
564 if (!pde[index[1]].present)
565 goto unmapped_gva;
566
567 pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
568 if (!pte[index[0]].present)
569 goto unmapped_gva;
570
571 return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
572
573unmapped_gva:
574 TEST_ASSERT(false, "No mapping for vm virtual address, "
575 "gva: 0x%lx", gva);
576}
577
578void vcpu_setup(struct kvm_vm *vm, int vcpuid)
579{
580 struct kvm_sregs sregs;
581
582 /* Set mode specific system register values. */
583 vcpu_sregs_get(vm, vcpuid, &sregs);
584
585 switch (vm->mode) {
586 case VM_MODE_FLAT48PG:
587 sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
588 sregs.cr4 |= X86_CR4_PAE;
589 sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
590
591 kvm_seg_set_unusable(&sregs.ldt);
592 kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);
593 kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);
594 kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);
595 break;
596
597 default:
598 TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
599 }
600 vcpu_sregs_set(vm, vcpuid, &sregs);
601
602 /* If virtual translation table have been setup, set system register
603 * to point to the tables. It's okay if they haven't been setup yet,
604 * in that the code that sets up the virtual translation tables, will
605 * go back through any VCPUs that have already been created and set
606 * their values.
607 */
608 if (vm->pgd_created) {
609 struct kvm_sregs sregs;
610
611 vcpu_sregs_get(vm, vcpuid, &sregs);
612
613 sregs.cr3 = vm->pgd;
614 vcpu_sregs_set(vm, vcpuid, &sregs);
615 }
616}
617/* Adds a vCPU with reasonable defaults (i.e., a stack)
618 *
619 * Input Args:
620 * vcpuid - The id of the VCPU to add to the VM.
621 * guest_code - The vCPU's entry point
622 */
623void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
624{
625 struct kvm_mp_state mp_state;
626 struct kvm_regs regs;
627 vm_vaddr_t stack_vaddr;
628 stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
629 DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
630
631 /* Create VCPU */
632 vm_vcpu_add(vm, vcpuid);
633
634 /* Setup guest general purpose registers */
635 vcpu_regs_get(vm, vcpuid, &regs);
636 regs.rflags = regs.rflags | 0x2;
637 regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
638 regs.rip = (unsigned long) guest_code;
639 vcpu_regs_set(vm, vcpuid, &regs);
640
641 /* Setup the MP state */
642 mp_state.mp_state = 0;
643 vcpu_set_mp_state(vm, vcpuid, &mp_state);
644}
645
646/* VM VCPU CPUID Set
647 *
648 * Input Args:
649 * vm - Virtual Machine
650 * vcpuid - VCPU id
651 * cpuid - The CPUID values to set.
652 *
653 * Output Args: None
654 *
655 * Return: void
656 *
657 * Set the VCPU's CPUID.
658 */
659void vcpu_set_cpuid(struct kvm_vm *vm,
660 uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
661{
662 struct vcpu *vcpu = vcpu_find(vm, vcpuid);
663 int rc;
664
665 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
666
667 rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
668 TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
669 rc, errno);
670
671}
672/* Create a VM with reasonable defaults
673 *
674 * Input Args:
675 * vcpuid - The id of the single VCPU to add to the VM.
676 * guest_code - The vCPU's entry point
677 *
678 * Output Args: None
679 *
680 * Return:
681 * Pointer to opaque structure that describes the created VM.
682 */
683struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
684{
685 struct kvm_vm *vm;
686
687 /* Create VM */
688 vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
689
690 /* Setup IRQ Chip */
691 vm_create_irqchip(vm);
692
693 /* Add the first vCPU. */
694 vm_vcpu_add_default(vm, vcpuid, guest_code);
695
696 return vm;
697}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c
new file mode 100644
index 000000000000..090fd3f19352
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_sregs_test.c
@@ -0,0 +1,54 @@
1/*
2 * KVM_SET_SREGS tests
3 *
4 * Copyright (C) 2018, Google LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2.
7 *
8 * This is a regression test for the bug fixed by the following commit:
9 * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
10 *
11 * That bug allowed a user-mode program that called the KVM_SET_SREGS
12 * ioctl to put a VCPU's local APIC into an invalid state.
13 *
14 */
15#define _GNU_SOURCE /* for program_invocation_short_name */
16#include <fcntl.h>
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/ioctl.h>
21
22#include "test_util.h"
23
24#include "kvm_util.h"
25#include "x86.h"
26
27#define VCPU_ID 5
28
29int main(int argc, char *argv[])
30{
31 struct kvm_sregs sregs;
32 struct kvm_vm *vm;
33 int rc;
34
35 /* Tell stdout not to buffer its content */
36 setbuf(stdout, NULL);
37
38 /* Create VM */
39 vm = vm_create_default(VCPU_ID, NULL);
40
41 vcpu_sregs_get(vm, VCPU_ID, &sregs);
42 sregs.apic_base = 1 << 10;
43 rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
44 TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
45 sregs.apic_base);
46 sregs.apic_base = 1 << 11;
47 rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
48 TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
49 sregs.apic_base);
50
51 kvm_vm_free(vm);
52
53 return 0;
54}