diff options
author | Avi Kivity <avi@qumranet.com> | 2007-10-17 06:18:47 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-01-30 10:52:53 -0500 |
commit | 42bf3f0a1f5a25be26e6bb53162cdee82410310d (patch) | |
tree | f134c24823081e6461f02f396d664e2f43e36486 /drivers/kvm | |
parent | d77a25074a8f845401f0eb786ebb8996e45d9e22 (diff) |
KVM: MMU: Simplify page table walker
Simplify the walker level loop not to carry so much information from one
loop to the next. In addition to being complex, this made kmap_atomic()
critical sections difficult to manage.
As a result of this change, kmap_atomic() sections are limited to actually
touching the guest pte, which allows the other functions called from the
walker to do sleepy operations. This will happen when we enable swapping.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm')
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 124 |
1 files changed, 48 insertions, 76 deletions
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index a9e687b5c1e9..bab1b7f8d705 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -59,32 +59,12 @@ | |||
59 | struct guest_walker { | 59 | struct guest_walker { |
60 | int level; | 60 | int level; |
61 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; | 61 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; |
62 | pt_element_t *table; | ||
63 | pt_element_t pte; | 62 | pt_element_t pte; |
64 | pt_element_t *ptep; | ||
65 | struct page *page; | ||
66 | int index; | ||
67 | pt_element_t inherited_ar; | 63 | pt_element_t inherited_ar; |
68 | gfn_t gfn; | 64 | gfn_t gfn; |
69 | u32 error_code; | 65 | u32 error_code; |
70 | }; | 66 | }; |
71 | 67 | ||
72 | static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu, | ||
73 | int write_fault, | ||
74 | pt_element_t *ptep, | ||
75 | gfn_t table_gfn) | ||
76 | { | ||
77 | gpa_t pte_gpa; | ||
78 | |||
79 | if (write_fault && !is_dirty_pte(*ptep)) { | ||
80 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
81 | *ptep |= PT_DIRTY_MASK; | ||
82 | pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT); | ||
83 | pte_gpa += offset_in_page(ptep); | ||
84 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep)); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /* | 68 | /* |
89 | * Fetch a guest pte for a guest virtual address | 69 | * Fetch a guest pte for a guest virtual address |
90 | */ | 70 | */ |
@@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
94 | { | 74 | { |
95 | hpa_t hpa; | 75 | hpa_t hpa; |
96 | struct kvm_memory_slot *slot; | 76 | struct kvm_memory_slot *slot; |
97 | pt_element_t *ptep; | 77 | struct page *page; |
98 | pt_element_t root; | 78 | pt_element_t *table; |
79 | pt_element_t pte; | ||
99 | gfn_t table_gfn; | 80 | gfn_t table_gfn; |
81 | unsigned index; | ||
82 | gpa_t pte_gpa; | ||
100 | 83 | ||
101 | pgprintk("%s: addr %lx\n", __FUNCTION__, addr); | 84 | pgprintk("%s: addr %lx\n", __FUNCTION__, addr); |
102 | walker->level = vcpu->mmu.root_level; | 85 | walker->level = vcpu->mmu.root_level; |
103 | walker->table = NULL; | 86 | pte = vcpu->cr3; |
104 | walker->page = NULL; | ||
105 | walker->ptep = NULL; | ||
106 | root = vcpu->cr3; | ||
107 | #if PTTYPE == 64 | 87 | #if PTTYPE == 64 |
108 | if (!is_long_mode(vcpu)) { | 88 | if (!is_long_mode(vcpu)) { |
109 | walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; | 89 | pte = vcpu->pdptrs[(addr >> 30) & 3]; |
110 | root = *walker->ptep; | 90 | if (!is_present_pte(pte)) |
111 | walker->pte = root; | ||
112 | if (!(root & PT_PRESENT_MASK)) | ||
113 | goto not_present; | 91 | goto not_present; |
114 | --walker->level; | 92 | --walker->level; |
115 | } | 93 | } |
116 | #endif | 94 | #endif |
117 | table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | ||
118 | walker->table_gfn[walker->level - 1] = table_gfn; | ||
119 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, | ||
120 | walker->level - 1, table_gfn); | ||
121 | slot = gfn_to_memslot(vcpu->kvm, table_gfn); | ||
122 | hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK); | ||
123 | walker->page = pfn_to_page(hpa >> PAGE_SHIFT); | ||
124 | walker->table = kmap_atomic(walker->page, KM_USER0); | ||
125 | |||
126 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 95 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
127 | (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); | 96 | (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); |
128 | 97 | ||
129 | walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; | 98 | walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; |
130 | 99 | ||
131 | for (;;) { | 100 | for (;;) { |
132 | int index = PT_INDEX(addr, walker->level); | 101 | index = PT_INDEX(addr, walker->level); |
133 | hpa_t paddr; | ||
134 | 102 | ||
135 | ptep = &walker->table[index]; | 103 | table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
136 | walker->index = index; | 104 | walker->table_gfn[walker->level - 1] = table_gfn; |
137 | ASSERT(((unsigned long)walker->table & PAGE_MASK) == | 105 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, |
138 | ((unsigned long)ptep & PAGE_MASK)); | 106 | walker->level - 1, table_gfn); |
107 | |||
108 | slot = gfn_to_memslot(vcpu->kvm, table_gfn); | ||
109 | hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK); | ||
110 | page = pfn_to_page(hpa >> PAGE_SHIFT); | ||
139 | 111 | ||
140 | if (!is_present_pte(*ptep)) | 112 | table = kmap_atomic(page, KM_USER0); |
113 | pte = table[index]; | ||
114 | kunmap_atomic(table, KM_USER0); | ||
115 | |||
116 | if (!is_present_pte(pte)) | ||
141 | goto not_present; | 117 | goto not_present; |
142 | 118 | ||
143 | if (write_fault && !is_writeble_pte(*ptep)) | 119 | if (write_fault && !is_writeble_pte(pte)) |
144 | if (user_fault || is_write_protection(vcpu)) | 120 | if (user_fault || is_write_protection(vcpu)) |
145 | goto access_error; | 121 | goto access_error; |
146 | 122 | ||
147 | if (user_fault && !(*ptep & PT_USER_MASK)) | 123 | if (user_fault && !(pte & PT_USER_MASK)) |
148 | goto access_error; | 124 | goto access_error; |
149 | 125 | ||
150 | #if PTTYPE == 64 | 126 | #if PTTYPE == 64 |
151 | if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK)) | 127 | if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) |
152 | goto access_error; | 128 | goto access_error; |
153 | #endif | 129 | #endif |
154 | 130 | ||
155 | if (!(*ptep & PT_ACCESSED_MASK)) { | 131 | if (!(pte & PT_ACCESSED_MASK)) { |
156 | mark_page_dirty(vcpu->kvm, table_gfn); | 132 | mark_page_dirty(vcpu->kvm, table_gfn); |
157 | *ptep |= PT_ACCESSED_MASK; | 133 | pte |= PT_ACCESSED_MASK; |
134 | table = kmap_atomic(page, KM_USER0); | ||
135 | table[index] = pte; | ||
136 | kunmap_atomic(table, KM_USER0); | ||
158 | } | 137 | } |
159 | 138 | ||
160 | if (walker->level == PT_PAGE_TABLE_LEVEL) { | 139 | if (walker->level == PT_PAGE_TABLE_LEVEL) { |
161 | walker->gfn = (*ptep & PT_BASE_ADDR_MASK) | 140 | walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; |
162 | >> PAGE_SHIFT; | ||
163 | FNAME(update_dirty_bit)(vcpu, write_fault, ptep, | ||
164 | table_gfn); | ||
165 | break; | 141 | break; |
166 | } | 142 | } |
167 | 143 | ||
168 | if (walker->level == PT_DIRECTORY_LEVEL | 144 | if (walker->level == PT_DIRECTORY_LEVEL |
169 | && (*ptep & PT_PAGE_SIZE_MASK) | 145 | && (pte & PT_PAGE_SIZE_MASK) |
170 | && (PTTYPE == 64 || is_pse(vcpu))) { | 146 | && (PTTYPE == 64 || is_pse(vcpu))) { |
171 | walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK) | 147 | walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK) |
172 | >> PAGE_SHIFT; | 148 | >> PAGE_SHIFT; |
173 | walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL); | 149 | walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL); |
174 | FNAME(update_dirty_bit)(vcpu, write_fault, ptep, | ||
175 | table_gfn); | ||
176 | break; | 150 | break; |
177 | } | 151 | } |
178 | 152 | ||
179 | walker->inherited_ar &= walker->table[index]; | 153 | walker->inherited_ar &= pte; |
180 | table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; | ||
181 | kunmap_atomic(walker->table, KM_USER0); | ||
182 | paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT); | ||
183 | walker->page = pfn_to_page(paddr >> PAGE_SHIFT); | ||
184 | walker->table = kmap_atomic(walker->page, KM_USER0); | ||
185 | --walker->level; | 154 | --walker->level; |
186 | walker->table_gfn[walker->level - 1] = table_gfn; | ||
187 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, | ||
188 | walker->level - 1, table_gfn); | ||
189 | } | 155 | } |
190 | walker->pte = *ptep; | 156 | |
191 | if (walker->page) | 157 | if (write_fault && !is_dirty_pte(pte)) { |
192 | walker->ptep = NULL; | 158 | mark_page_dirty(vcpu->kvm, table_gfn); |
193 | if (walker->table) | 159 | pte |= PT_DIRTY_MASK; |
194 | kunmap_atomic(walker->table, KM_USER0); | 160 | table = kmap_atomic(page, KM_USER0); |
195 | pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); | 161 | table[index] = pte; |
162 | kunmap_atomic(table, KM_USER0); | ||
163 | pte_gpa = table_gfn << PAGE_SHIFT; | ||
164 | pte_gpa += index * sizeof(pt_element_t); | ||
165 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); | ||
166 | } | ||
167 | |||
168 | walker->pte = pte; | ||
169 | pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte); | ||
196 | return 1; | 170 | return 1; |
197 | 171 | ||
198 | not_present: | 172 | not_present: |
@@ -209,8 +183,6 @@ err: | |||
209 | walker->error_code |= PFERR_USER_MASK; | 183 | walker->error_code |= PFERR_USER_MASK; |
210 | if (fetch_fault) | 184 | if (fetch_fault) |
211 | walker->error_code |= PFERR_FETCH_MASK; | 185 | walker->error_code |= PFERR_FETCH_MASK; |
212 | if (walker->table) | ||
213 | kunmap_atomic(walker->table, KM_USER0); | ||
214 | return 0; | 186 | return 0; |
215 | } | 187 | } |
216 | 188 | ||