aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-10-17 06:18:47 -0400
committerAvi Kivity <avi@qumranet.com>2008-01-30 10:52:53 -0500
commit42bf3f0a1f5a25be26e6bb53162cdee82410310d (patch)
treef134c24823081e6461f02f396d664e2f43e36486
parentd77a25074a8f845401f0eb786ebb8996e45d9e22 (diff)
KVM: MMU: Simplify page table walker
Simplify the walker level loop not to carry so much information from one loop to the next. In addition to being complex, this made kmap_atomic() critical sections difficult to manage. As a result of this change, kmap_atomic() sections are limited to actually touching the guest pte, which allows the other functions called from the walker to do sleepy operations. This will happen when we enable swapping. Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--drivers/kvm/paging_tmpl.h124
1 files changed, 48 insertions, 76 deletions
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index a9e687b5c1e9..bab1b7f8d705 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -59,32 +59,12 @@
59struct guest_walker { 59struct guest_walker {
60 int level; 60 int level;
61 gfn_t table_gfn[PT_MAX_FULL_LEVELS]; 61 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
62 pt_element_t *table;
63 pt_element_t pte; 62 pt_element_t pte;
64 pt_element_t *ptep;
65 struct page *page;
66 int index;
67 pt_element_t inherited_ar; 63 pt_element_t inherited_ar;
68 gfn_t gfn; 64 gfn_t gfn;
69 u32 error_code; 65 u32 error_code;
70}; 66};
71 67
72static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
73 int write_fault,
74 pt_element_t *ptep,
75 gfn_t table_gfn)
76{
77 gpa_t pte_gpa;
78
79 if (write_fault && !is_dirty_pte(*ptep)) {
80 mark_page_dirty(vcpu->kvm, table_gfn);
81 *ptep |= PT_DIRTY_MASK;
82 pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
83 pte_gpa += offset_in_page(ptep);
84 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
85 }
86}
87
88/* 68/*
89 * Fetch a guest pte for a guest virtual address 69 * Fetch a guest pte for a guest virtual address
90 */ 70 */
@@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
94{ 74{
95 hpa_t hpa; 75 hpa_t hpa;
96 struct kvm_memory_slot *slot; 76 struct kvm_memory_slot *slot;
97 pt_element_t *ptep; 77 struct page *page;
98 pt_element_t root; 78 pt_element_t *table;
79 pt_element_t pte;
99 gfn_t table_gfn; 80 gfn_t table_gfn;
81 unsigned index;
82 gpa_t pte_gpa;
100 83
101 pgprintk("%s: addr %lx\n", __FUNCTION__, addr); 84 pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
102 walker->level = vcpu->mmu.root_level; 85 walker->level = vcpu->mmu.root_level;
103 walker->table = NULL; 86 pte = vcpu->cr3;
104 walker->page = NULL;
105 walker->ptep = NULL;
106 root = vcpu->cr3;
107#if PTTYPE == 64 87#if PTTYPE == 64
108 if (!is_long_mode(vcpu)) { 88 if (!is_long_mode(vcpu)) {
109 walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; 89 pte = vcpu->pdptrs[(addr >> 30) & 3];
110 root = *walker->ptep; 90 if (!is_present_pte(pte))
111 walker->pte = root;
112 if (!(root & PT_PRESENT_MASK))
113 goto not_present; 91 goto not_present;
114 --walker->level; 92 --walker->level;
115 } 93 }
116#endif 94#endif
117 table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
118 walker->table_gfn[walker->level - 1] = table_gfn;
119 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
120 walker->level - 1, table_gfn);
121 slot = gfn_to_memslot(vcpu->kvm, table_gfn);
122 hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK);
123 walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
124 walker->table = kmap_atomic(walker->page, KM_USER0);
125
126 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 95 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
127 (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); 96 (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
128 97
129 walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; 98 walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
130 99
131 for (;;) { 100 for (;;) {
132 int index = PT_INDEX(addr, walker->level); 101 index = PT_INDEX(addr, walker->level);
133 hpa_t paddr;
134 102
135 ptep = &walker->table[index]; 103 table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
136 walker->index = index; 104 walker->table_gfn[walker->level - 1] = table_gfn;
137 ASSERT(((unsigned long)walker->table & PAGE_MASK) == 105 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
138 ((unsigned long)ptep & PAGE_MASK)); 106 walker->level - 1, table_gfn);
107
108 slot = gfn_to_memslot(vcpu->kvm, table_gfn);
109 hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK);
110 page = pfn_to_page(hpa >> PAGE_SHIFT);
139 111
140 if (!is_present_pte(*ptep)) 112 table = kmap_atomic(page, KM_USER0);
113 pte = table[index];
114 kunmap_atomic(table, KM_USER0);
115
116 if (!is_present_pte(pte))
141 goto not_present; 117 goto not_present;
142 118
143 if (write_fault && !is_writeble_pte(*ptep)) 119 if (write_fault && !is_writeble_pte(pte))
144 if (user_fault || is_write_protection(vcpu)) 120 if (user_fault || is_write_protection(vcpu))
145 goto access_error; 121 goto access_error;
146 122
147 if (user_fault && !(*ptep & PT_USER_MASK)) 123 if (user_fault && !(pte & PT_USER_MASK))
148 goto access_error; 124 goto access_error;
149 125
150#if PTTYPE == 64 126#if PTTYPE == 64
151 if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK)) 127 if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
152 goto access_error; 128 goto access_error;
153#endif 129#endif
154 130
155 if (!(*ptep & PT_ACCESSED_MASK)) { 131 if (!(pte & PT_ACCESSED_MASK)) {
156 mark_page_dirty(vcpu->kvm, table_gfn); 132 mark_page_dirty(vcpu->kvm, table_gfn);
157 *ptep |= PT_ACCESSED_MASK; 133 pte |= PT_ACCESSED_MASK;
134 table = kmap_atomic(page, KM_USER0);
135 table[index] = pte;
136 kunmap_atomic(table, KM_USER0);
158 } 137 }
159 138
160 if (walker->level == PT_PAGE_TABLE_LEVEL) { 139 if (walker->level == PT_PAGE_TABLE_LEVEL) {
161 walker->gfn = (*ptep & PT_BASE_ADDR_MASK) 140 walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
162 >> PAGE_SHIFT;
163 FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
164 table_gfn);
165 break; 141 break;
166 } 142 }
167 143
168 if (walker->level == PT_DIRECTORY_LEVEL 144 if (walker->level == PT_DIRECTORY_LEVEL
169 && (*ptep & PT_PAGE_SIZE_MASK) 145 && (pte & PT_PAGE_SIZE_MASK)
170 && (PTTYPE == 64 || is_pse(vcpu))) { 146 && (PTTYPE == 64 || is_pse(vcpu))) {
171 walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK) 147 walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK)
172 >> PAGE_SHIFT; 148 >> PAGE_SHIFT;
173 walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL); 149 walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
174 FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
175 table_gfn);
176 break; 150 break;
177 } 151 }
178 152
179 walker->inherited_ar &= walker->table[index]; 153 walker->inherited_ar &= pte;
180 table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
181 kunmap_atomic(walker->table, KM_USER0);
182 paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT);
183 walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
184 walker->table = kmap_atomic(walker->page, KM_USER0);
185 --walker->level; 154 --walker->level;
186 walker->table_gfn[walker->level - 1] = table_gfn;
187 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
188 walker->level - 1, table_gfn);
189 } 155 }
190 walker->pte = *ptep; 156
191 if (walker->page) 157 if (write_fault && !is_dirty_pte(pte)) {
192 walker->ptep = NULL; 158 mark_page_dirty(vcpu->kvm, table_gfn);
193 if (walker->table) 159 pte |= PT_DIRTY_MASK;
194 kunmap_atomic(walker->table, KM_USER0); 160 table = kmap_atomic(page, KM_USER0);
195 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); 161 table[index] = pte;
162 kunmap_atomic(table, KM_USER0);
163 pte_gpa = table_gfn << PAGE_SHIFT;
164 pte_gpa += index * sizeof(pt_element_t);
165 kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
166 }
167
168 walker->pte = pte;
169 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
196 return 1; 170 return 1;
197 171
198not_present: 172not_present:
@@ -209,8 +183,6 @@ err:
209 walker->error_code |= PFERR_USER_MASK; 183 walker->error_code |= PFERR_USER_MASK;
210 if (fetch_fault) 184 if (fetch_fault)
211 walker->error_code |= PFERR_FETCH_MASK; 185 walker->error_code |= PFERR_FETCH_MASK;
212 if (walker->table)
213 kunmap_atomic(walker->table, KM_USER0);
214 return 0; 186 return 0;
215} 187}
216 188