aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2013-06-19 05:09:20 -0400
committerGleb Natapov <gleb@redhat.com>2013-06-27 07:20:42 -0400
commitaccaefe07ddbeb12c0de4cec1d62dba6a0ea1605 (patch)
tree9363694003f6263cc15b8fcf2c5351d065a2a0a4
parenta8eca9dcc656a405a28ffba43f3d86a1ff0eb331 (diff)
KVM: MMU: document clear_spte_count
Document it to Documentation/virtual/kvm/mmu.txt Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--Documentation/virtual/kvm/mmu.txt5
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/kvm/mmu.c17
3 files changed, 23 insertions, 3 deletions
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index 869abcc48315..f514a3fad9b9 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -210,6 +210,11 @@ Shadow pages contain the following information:
210 A bitmap indicating which sptes in spt point (directly or indirectly) at 210 A bitmap indicating which sptes in spt point (directly or indirectly) at
211 pages that may be unsynchronized. Used to quickly locate all unsychronized 211 pages that may be unsynchronized. Used to quickly locate all unsychronized
212 pages reachable from a given page. 212 pages reachable from a given page.
213 clear_spte_count:
214 Only present on 32-bit hosts, where a 64-bit spte cannot be written
215 atomically. The reader uses this while running out of the MMU lock
216 to detect in-progress updates and retry them until the writer has
217 finished the write.
213 218
214Reverse map 219Reverse map
215=========== 220===========
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 966f2650b6ab..5d28c11d5e21 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -226,6 +226,10 @@ struct kvm_mmu_page {
226 DECLARE_BITMAP(unsync_child_bitmap, 512); 226 DECLARE_BITMAP(unsync_child_bitmap, 512);
227 227
228#ifdef CONFIG_X86_32 228#ifdef CONFIG_X86_32
229 /*
230 * Used out of the mmu-lock to avoid reading spte values while an
231 * update is in progress; see the comments in __get_spte_lockless().
232 */
229 int clear_spte_count; 233 int clear_spte_count;
230#endif 234#endif
231 235
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7113a0fb544c..f385a4cf4bfd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -466,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
466/* 466/*
467 * The idea using the light way get the spte on x86_32 guest is from 467 * The idea using the light way get the spte on x86_32 guest is from
468 * gup_get_pte(arch/x86/mm/gup.c). 468 * gup_get_pte(arch/x86/mm/gup.c).
469 * The difference is we can not catch the spte tlb flush if we leave 469 *
470 * guest mode, so we emulate it by increase clear_spte_count when spte 470 * An spte tlb flush may be pending, because kvm_set_pte_rmapp
471 * is cleared. 471 * coalesces them and we are running out of the MMU lock. Therefore
472 * we need to protect against in-progress updates of the spte.
473 *
474 * Reading the spte while an update is in progress may get the old value
475 * for the high part of the spte. The race is fine for a present->non-present
476 * change (because the high part of the spte is ignored for non-present spte),
477 * but for a present->present change we must reread the spte.
478 *
479 * All such changes are done in two steps (present->non-present and
480 * non-present->present), hence it is enough to count the number of
481 * present->non-present updates: if it changed while reading the spte,
482 * we might have hit the race. This is done using clear_spte_count.
472 */ 483 */
473static u64 __get_spte_lockless(u64 *sptep) 484static u64 __get_spte_lockless(u64 *sptep)
474{ 485{