aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
authorAndres Lagar-Cavilla <andreslc@google.com>2014-09-17 13:51:48 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2014-09-24 08:07:54 -0400
commit234b239bea395316d7f78018c672f4a88b3cdf0d (patch)
tree3defdf8536d0b73b1130d23fc6c79343a9831d59 /virt/kvm/kvm_main.c
parentb4619660635732bd2da376bb8f31f94d0f15fc98 (diff)
kvm: Faults which trigger IO release the mmap_sem
When KVM handles a tdp fault it uses FOLL_NOWAIT. If the guest memory has been swapped out or is behind a filemap, this will trigger async readahead and return immediately. The rationale is that KVM will kick back the guest with an "async page fault" and allow for some other guest process to take over. If async PFs are enabled the fault is retried asap from an async workqueue. If not, it's retried immediately in the same code path. In either case the retry will not relinquish the mmap semaphore and will block on the IO. This is a bad thing, as other mmap semaphore users now stall as a function of swap or filemap latency. This patch ensures both the regular and async PF path re-enter the fault allowing for the mmap semaphore to be relinquished in the case of IO wait. Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c49
1 files changed, 46 insertions, 3 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 499db0977f3c..1c6e8476b244 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1122,6 +1122,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
1122 return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); 1122 return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
1123} 1123}
1124 1124
1125int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
1126 unsigned long addr, bool write_fault,
1127 struct page **pagep)
1128{
1129 int npages;
1130 int locked = 1;
1131 int flags = FOLL_TOUCH | FOLL_HWPOISON |
1132 (pagep ? FOLL_GET : 0) |
1133 (write_fault ? FOLL_WRITE : 0);
1134
1135 /*
1136 * If retrying the fault, we get here *not* having allowed the filemap
1137 * to wait on the page lock. We should now allow waiting on the IO with
1138 * the mmap semaphore released.
1139 */
1140 down_read(&mm->mmap_sem);
1141 npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
1142 &locked);
1143 if (!locked) {
1144 VM_BUG_ON(npages != -EBUSY);
1145
1146 if (!pagep)
1147 return 0;
1148
1149 /*
1150 * The previous call has now waited on the IO. Now we can
1151 * retry and complete. Pass TRIED to ensure we do not re
1152 * schedule async IO (see e.g. filemap_fault).
1153 */
1154 down_read(&mm->mmap_sem);
1155 npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
1156 pagep, NULL, NULL);
1157 }
1158 up_read(&mm->mmap_sem);
1159 return npages;
1160}
1161
1125static inline int check_user_page_hwpoison(unsigned long addr) 1162static inline int check_user_page_hwpoison(unsigned long addr)
1126{ 1163{
1127 int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; 1164 int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1184,9 +1221,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
1184 npages = get_user_page_nowait(current, current->mm, 1221 npages = get_user_page_nowait(current, current->mm,
1185 addr, write_fault, page); 1222 addr, write_fault, page);
1186 up_read(&current->mm->mmap_sem); 1223 up_read(&current->mm->mmap_sem);
1187 } else 1224 } else {
1188 npages = get_user_pages_fast(addr, 1, write_fault, 1225 /*
1189 page); 1226 * By now we have tried gup_fast, and possibly async_pf, and we
1227 * are certainly not atomic. Time to retry the gup, allowing
1228 * mmap semaphore to be relinquished in the case of IO.
1229 */
1230 npages = kvm_get_user_page_io(current, current->mm, addr,
1231 write_fault, page);
1232 }
1190 if (npages != 1) 1233 if (npages != 1)
1191 return npages; 1234 return npages;
1192 1235