diff options
author | Andres Lagar-Cavilla <andreslc@google.com> | 2014-09-17 13:51:48 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2014-09-24 08:07:54 -0400 |
commit | 234b239bea395316d7f78018c672f4a88b3cdf0d (patch) | |
tree | 3defdf8536d0b73b1130d23fc6c79343a9831d59 /virt/kvm/kvm_main.c | |
parent | b4619660635732bd2da376bb8f31f94d0f15fc98 (diff) |
kvm: Faults which trigger IO release the mmap_sem
When KVM handles a tdp fault it uses FOLL_NOWAIT. If the guest memory
has been swapped out or is behind a filemap, this will trigger async
readahead and return immediately. The rationale is that KVM will kick
back the guest with an "async page fault" and allow for some other
guest process to take over.
If async PFs are enabled the fault is retried asap from an async
workqueue. If not, it's retried immediately in the same code path. In
either case the retry will not relinquish the mmap semaphore and will
block on the IO. This is a bad thing, as other mmap semaphore users
now stall as a function of swap or filemap latency.
This patch ensures both the regular and async PF path re-enter the
fault allowing for the mmap semaphore to be relinquished in the case
of IO wait.
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 49 |
1 files changed, 46 insertions, 3 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 499db0977f3c..1c6e8476b244 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -1122,6 +1122,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | |||
1122 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); | 1122 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); |
1123 | } | 1123 | } |
1124 | 1124 | ||
1125 | int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, | ||
1126 | unsigned long addr, bool write_fault, | ||
1127 | struct page **pagep) | ||
1128 | { | ||
1129 | int npages; | ||
1130 | int locked = 1; | ||
1131 | int flags = FOLL_TOUCH | FOLL_HWPOISON | | ||
1132 | (pagep ? FOLL_GET : 0) | | ||
1133 | (write_fault ? FOLL_WRITE : 0); | ||
1134 | |||
1135 | /* | ||
1136 | * If retrying the fault, we get here *not* having allowed the filemap | ||
1137 | * to wait on the page lock. We should now allow waiting on the IO with | ||
1138 | * the mmap semaphore released. | ||
1139 | */ | ||
1140 | down_read(&mm->mmap_sem); | ||
1141 | npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL, | ||
1142 | &locked); | ||
1143 | if (!locked) { | ||
1144 | VM_BUG_ON(npages != -EBUSY); | ||
1145 | |||
1146 | if (!pagep) | ||
1147 | return 0; | ||
1148 | |||
1149 | /* | ||
1150 | * The previous call has now waited on the IO. Now we can | ||
1151 | * retry and complete. Pass TRIED to ensure we do not re | ||
1152 | * schedule async IO (see e.g. filemap_fault). | ||
1153 | */ | ||
1154 | down_read(&mm->mmap_sem); | ||
1155 | npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED, | ||
1156 | pagep, NULL, NULL); | ||
1157 | } | ||
1158 | up_read(&mm->mmap_sem); | ||
1159 | return npages; | ||
1160 | } | ||
1161 | |||
1125 | static inline int check_user_page_hwpoison(unsigned long addr) | 1162 | static inline int check_user_page_hwpoison(unsigned long addr) |
1126 | { | 1163 | { |
1127 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; | 1164 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; |
@@ -1184,9 +1221,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, | |||
1184 | npages = get_user_page_nowait(current, current->mm, | 1221 | npages = get_user_page_nowait(current, current->mm, |
1185 | addr, write_fault, page); | 1222 | addr, write_fault, page); |
1186 | up_read(¤t->mm->mmap_sem); | 1223 | up_read(¤t->mm->mmap_sem); |
1187 | } else | 1224 | } else { |
1188 | npages = get_user_pages_fast(addr, 1, write_fault, | 1225 | /* |
1189 | page); | 1226 | * By now we have tried gup_fast, and possibly async_pf, and we |
1227 | * are certainly not atomic. Time to retry the gup, allowing | ||
1228 | * mmap semaphore to be relinquished in the case of IO. | ||
1229 | */ | ||
1230 | npages = kvm_get_user_page_io(current, current->mm, addr, | ||
1231 | write_fault, page); | ||
1232 | } | ||
1190 | if (npages != 1) | 1233 | if (npages != 1) |
1191 | return npages; | 1234 | return npages; |
1192 | 1235 | ||