diff options
author | David Vrabel <david.vrabel@citrix.com> | 2015-03-11 10:49:57 -0400 |
---|---|---|
committer | David Vrabel <david.vrabel@citrix.com> | 2015-03-16 10:49:15 -0400 |
commit | 4e8c0c8c4bf3a5b5c98046e146ab3884bf7a7d0e (patch) | |
tree | e2da5980fd405c7109a342b13f0a2a1214f94b61 /drivers/xen/privcmd.c | |
parent | 628c28eefd6f2cef03b212081b466ae43fd093a3 (diff) |
xen/privcmd: improve performance of MMAPBATCH_V2
Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map
multiple frames at a time rather than one at a time, despite the pages
being non-consecutive GFNs.
xen_remap_foreign_mfn_array() is added which maps an array of GFNs
(instead of a consecutive range of GFNs).
Since per-frame errors are returned in an array, privcmd must set the
MMAPBATCH_V1 error bits as part of the "report errors" phase, after
all the frames are mapped.
Migrate times are significantly improved (when using a PV toolstack
domain). For example, for an idle 12 GiB PV guest:
Before After
real 0m38.179s 0m26.868s
user 0m15.096s 0m13.652s
sys 0m28.988s 0m18.732s
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Diffstat (limited to 'drivers/xen/privcmd.c')
-rw-r--r-- | drivers/xen/privcmd.c | 117 |
1 files changed, 82 insertions, 35 deletions
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 59ac71c4a043..5a296161d843 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c | |||
@@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size, | |||
159 | return ret; | 159 | return ret; |
160 | } | 160 | } |
161 | 161 | ||
162 | /* | ||
163 | * Similar to traverse_pages, but use each page as a "block" of | ||
164 | * data to be processed as one unit. | ||
165 | */ | ||
166 | static int traverse_pages_block(unsigned nelem, size_t size, | ||
167 | struct list_head *pos, | ||
168 | int (*fn)(void *data, int nr, void *state), | ||
169 | void *state) | ||
170 | { | ||
171 | void *pagedata; | ||
172 | unsigned pageidx; | ||
173 | int ret = 0; | ||
174 | |||
175 | BUG_ON(size > PAGE_SIZE); | ||
176 | |||
177 | pageidx = PAGE_SIZE; | ||
178 | |||
179 | while (nelem) { | ||
180 | int nr = (PAGE_SIZE/size); | ||
181 | struct page *page; | ||
182 | if (nr > nelem) | ||
183 | nr = nelem; | ||
184 | pos = pos->next; | ||
185 | page = list_entry(pos, struct page, lru); | ||
186 | pagedata = page_address(page); | ||
187 | ret = (*fn)(pagedata, nr, state); | ||
188 | if (ret) | ||
189 | break; | ||
190 | nelem -= nr; | ||
191 | } | ||
192 | |||
193 | return ret; | ||
194 | } | ||
195 | |||
162 | struct mmap_mfn_state { | 196 | struct mmap_mfn_state { |
163 | unsigned long va; | 197 | unsigned long va; |
164 | struct vm_area_struct *vma; | 198 | struct vm_area_struct *vma; |
@@ -274,39 +308,25 @@ struct mmap_batch_state { | |||
274 | /* auto translated dom0 note: if domU being created is PV, then mfn is | 308 | /* auto translated dom0 note: if domU being created is PV, then mfn is |
275 | * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). | 309 | * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). |
276 | */ | 310 | */ |
277 | static int mmap_batch_fn(void *data, void *state) | 311 | static int mmap_batch_fn(void *data, int nr, void *state) |
278 | { | 312 | { |
279 | xen_pfn_t *mfnp = data; | 313 | xen_pfn_t *mfnp = data; |
280 | struct mmap_batch_state *st = state; | 314 | struct mmap_batch_state *st = state; |
281 | struct vm_area_struct *vma = st->vma; | 315 | struct vm_area_struct *vma = st->vma; |
282 | struct page **pages = vma->vm_private_data; | 316 | struct page **pages = vma->vm_private_data; |
283 | struct page *cur_page = NULL; | 317 | struct page **cur_pages = NULL; |
284 | int ret; | 318 | int ret; |
285 | 319 | ||
286 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 320 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
287 | cur_page = pages[st->index++]; | 321 | cur_pages = &pages[st->index]; |
288 | 322 | ||
289 | ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, | 323 | BUG_ON(nr < 0); |
290 | st->vma->vm_page_prot, st->domain, | 324 | ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr, |
291 | &cur_page); | 325 | (int *)mfnp, st->vma->vm_page_prot, |
326 | st->domain, cur_pages); | ||
292 | 327 | ||
293 | /* Store error code for second pass. */ | 328 | /* Adjust the global_error? */ |
294 | if (st->version == 1) { | 329 | if (ret != nr) { |
295 | if (ret < 0) { | ||
296 | /* | ||
297 | * V1 encodes the error codes in the 32bit top nibble of the | ||
298 | * mfn (with its known limitations vis-a-vis 64 bit callers). | ||
299 | */ | ||
300 | *mfnp |= (ret == -ENOENT) ? | ||
301 | PRIVCMD_MMAPBATCH_PAGED_ERROR : | ||
302 | PRIVCMD_MMAPBATCH_MFN_ERROR; | ||
303 | } | ||
304 | } else { /* st->version == 2 */ | ||
305 | *((int *) mfnp) = ret; | ||
306 | } | ||
307 | |||
308 | /* And see if it affects the global_error. */ | ||
309 | if (ret < 0) { | ||
310 | if (ret == -ENOENT) | 330 | if (ret == -ENOENT) |
311 | st->global_error = -ENOENT; | 331 | st->global_error = -ENOENT; |
312 | else { | 332 | else { |
@@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state) | |||
315 | st->global_error = 1; | 335 | st->global_error = 1; |
316 | } | 336 | } |
317 | } | 337 | } |
318 | st->va += PAGE_SIZE; | 338 | st->va += PAGE_SIZE * nr; |
339 | st->index += nr; | ||
319 | 340 | ||
320 | return 0; | 341 | return 0; |
321 | } | 342 | } |
322 | 343 | ||
323 | static int mmap_return_errors(void *data, void *state) | 344 | static int mmap_return_error(int err, struct mmap_batch_state *st) |
324 | { | 345 | { |
325 | struct mmap_batch_state *st = state; | 346 | int ret; |
326 | 347 | ||
327 | if (st->version == 1) { | 348 | if (st->version == 1) { |
328 | xen_pfn_t mfnp = *((xen_pfn_t *) data); | 349 | if (err) { |
329 | if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) | 350 | xen_pfn_t mfn; |
330 | return __put_user(mfnp, st->user_mfn++); | 351 | |
331 | else | 352 | ret = get_user(mfn, st->user_mfn); |
353 | if (ret < 0) | ||
354 | return ret; | ||
355 | /* | ||
356 | * V1 encodes the error codes in the 32bit top | ||
357 | * nibble of the mfn (with its known | ||
358 | * limitations vis-a-vis 64 bit callers). | ||
359 | */ | ||
360 | mfn |= (err == -ENOENT) ? | ||
361 | PRIVCMD_MMAPBATCH_PAGED_ERROR : | ||
362 | PRIVCMD_MMAPBATCH_MFN_ERROR; | ||
363 | return __put_user(mfn, st->user_mfn++); | ||
364 | } else | ||
332 | st->user_mfn++; | 365 | st->user_mfn++; |
333 | } else { /* st->version == 2 */ | 366 | } else { /* st->version == 2 */ |
334 | int err = *((int *) data); | ||
335 | if (err) | 367 | if (err) |
336 | return __put_user(err, st->user_err++); | 368 | return __put_user(err, st->user_err++); |
337 | else | 369 | else |
@@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state) | |||
341 | return 0; | 373 | return 0; |
342 | } | 374 | } |
343 | 375 | ||
376 | static int mmap_return_errors(void *data, int nr, void *state) | ||
377 | { | ||
378 | struct mmap_batch_state *st = state; | ||
379 | int *errs = data; | ||
380 | int i; | ||
381 | int ret; | ||
382 | |||
383 | for (i = 0; i < nr; i++) { | ||
384 | ret = mmap_return_error(errs[i], st); | ||
385 | if (ret < 0) | ||
386 | return ret; | ||
387 | } | ||
388 | return 0; | ||
389 | } | ||
390 | |||
344 | /* Allocate pfns that are then mapped with gmfns from foreign domid. Update | 391 | /* Allocate pfns that are then mapped with gmfns from foreign domid. Update |
345 | * the vma with the page info to use later. | 392 | * the vma with the page info to use later. |
346 | * Returns: 0 if success, otherwise -errno | 393 | * Returns: 0 if success, otherwise -errno |
@@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) | |||
472 | state.version = version; | 519 | state.version = version; |
473 | 520 | ||
474 | /* mmap_batch_fn guarantees ret == 0 */ | 521 | /* mmap_batch_fn guarantees ret == 0 */ |
475 | BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), | 522 | BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), |
476 | &pagelist, mmap_batch_fn, &state)); | 523 | &pagelist, mmap_batch_fn, &state)); |
477 | 524 | ||
478 | up_write(&mm->mmap_sem); | 525 | up_write(&mm->mmap_sem); |
479 | 526 | ||
@@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) | |||
481 | /* Write back errors in second pass. */ | 528 | /* Write back errors in second pass. */ |
482 | state.user_mfn = (xen_pfn_t *)m.arr; | 529 | state.user_mfn = (xen_pfn_t *)m.arr; |
483 | state.user_err = m.err; | 530 | state.user_err = m.err; |
484 | ret = traverse_pages(m.num, sizeof(xen_pfn_t), | 531 | ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), |
485 | &pagelist, mmap_return_errors, &state); | 532 | &pagelist, mmap_return_errors, &state); |
486 | } else | 533 | } else |
487 | ret = 0; | 534 | ret = 0; |
488 | 535 | ||