diff options
author | Vitaly Kuznetsov <vkuznets@redhat.com> | 2014-08-08 17:22:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-08 18:57:23 -0400 |
commit | 0692dedcf64bf3cdcfb9f6a51c70d49c8db351d2 (patch) | |
tree | bddf505939396658f60fd594f9281456ac4eb9e1 /fs/proc | |
parent | 33144e8429bd7fceacbb869a7f5061db42e13fe6 (diff) |
fs/proc/vmcore.c:mmap_vmcore: skip non-ram pages reported by hypervisors
We have a special check in read_vmcore() handler to check if the page was
reported as ram or not by the hypervisor (pfn_is_ram()). However, when
vmcore is read with mmap() no such check is performed. That can lead to
unpredictable results, e.g. when running Xen PVHVM guest memcpy() after
mmap() on /proc/vmcore will hang processing HVMMEM_mmio_dm pages creating
enormous load in both DomU and Dom0.
Fix the issue by mapping each non-ram page to the zero page. Keep direct
path with remap_oldmem_pfn_range() to avoid looping through all pages on
bare metal.
The issue can also be solved by overriding remap_oldmem_pfn_range() in
xen-specific code, as remap_oldmem_pfn_range() was been designed for.
That, however, would involve non-obvious xen code path for all x86 builds
with CONFIG_XEN_PVHVM=y and would prevent all other hypervisor-specific
code on x86 arch from doing the same override.
[fengguang.wu@intel.com: remap_oldmem_pfn_checked() can be static]
[akpm@linux-foundation.org: clean up layout]
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/vmcore.c | 82 |
1 files changed, 79 insertions, 3 deletions
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 382aa890e228..a90d6d354199 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -328,6 +328,82 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz) | |||
328 | * virtually contiguous user-space in ELF layout. | 328 | * virtually contiguous user-space in ELF layout. |
329 | */ | 329 | */ |
330 | #ifdef CONFIG_MMU | 330 | #ifdef CONFIG_MMU |
331 | /* | ||
332 | * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages | ||
333 | * reported as not being ram with the zero page. | ||
334 | * | ||
335 | * @vma: vm_area_struct describing requested mapping | ||
336 | * @from: start remapping from | ||
337 | * @pfn: page frame number to start remapping to | ||
338 | * @size: remapping size | ||
339 | * @prot: protection bits | ||
340 | * | ||
341 | * Returns zero on success, -EAGAIN on failure. | ||
342 | */ | ||
343 | static int remap_oldmem_pfn_checked(struct vm_area_struct *vma, | ||
344 | unsigned long from, unsigned long pfn, | ||
345 | unsigned long size, pgprot_t prot) | ||
346 | { | ||
347 | unsigned long map_size; | ||
348 | unsigned long pos_start, pos_end, pos; | ||
349 | unsigned long zeropage_pfn = my_zero_pfn(0); | ||
350 | size_t len = 0; | ||
351 | |||
352 | pos_start = pfn; | ||
353 | pos_end = pfn + (size >> PAGE_SHIFT); | ||
354 | |||
355 | for (pos = pos_start; pos < pos_end; ++pos) { | ||
356 | if (!pfn_is_ram(pos)) { | ||
357 | /* | ||
358 | * We hit a page which is not ram. Remap the continuous | ||
359 | * region between pos_start and pos-1 and replace | ||
360 | * the non-ram page at pos with the zero page. | ||
361 | */ | ||
362 | if (pos > pos_start) { | ||
363 | /* Remap continuous region */ | ||
364 | map_size = (pos - pos_start) << PAGE_SHIFT; | ||
365 | if (remap_oldmem_pfn_range(vma, from + len, | ||
366 | pos_start, map_size, | ||
367 | prot)) | ||
368 | goto fail; | ||
369 | len += map_size; | ||
370 | } | ||
371 | /* Remap the zero page */ | ||
372 | if (remap_oldmem_pfn_range(vma, from + len, | ||
373 | zeropage_pfn, | ||
374 | PAGE_SIZE, prot)) | ||
375 | goto fail; | ||
376 | len += PAGE_SIZE; | ||
377 | pos_start = pos + 1; | ||
378 | } | ||
379 | } | ||
380 | if (pos > pos_start) { | ||
381 | /* Remap the rest */ | ||
382 | map_size = (pos - pos_start) << PAGE_SHIFT; | ||
383 | if (remap_oldmem_pfn_range(vma, from + len, pos_start, | ||
384 | map_size, prot)) | ||
385 | goto fail; | ||
386 | } | ||
387 | return 0; | ||
388 | fail: | ||
389 | do_munmap(vma->vm_mm, from, len); | ||
390 | return -EAGAIN; | ||
391 | } | ||
392 | |||
393 | static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, | ||
394 | unsigned long from, unsigned long pfn, | ||
395 | unsigned long size, pgprot_t prot) | ||
396 | { | ||
397 | /* | ||
398 | * Check if oldmem_pfn_is_ram was registered to avoid | ||
399 | * looping over all pages without a reason. | ||
400 | */ | ||
401 | if (oldmem_pfn_is_ram) | ||
402 | return remap_oldmem_pfn_checked(vma, from, pfn, size, prot); | ||
403 | else | ||
404 | return remap_oldmem_pfn_range(vma, from, pfn, size, prot); | ||
405 | } | ||
406 | |||
331 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | 407 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) |
332 | { | 408 | { |
333 | size_t size = vma->vm_end - vma->vm_start; | 409 | size_t size = vma->vm_end - vma->vm_start; |
@@ -387,9 +463,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | |||
387 | 463 | ||
388 | tsz = min_t(size_t, m->offset + m->size - start, size); | 464 | tsz = min_t(size_t, m->offset + m->size - start, size); |
389 | paddr = m->paddr + start - m->offset; | 465 | paddr = m->paddr + start - m->offset; |
390 | if (remap_oldmem_pfn_range(vma, vma->vm_start + len, | 466 | if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len, |
391 | paddr >> PAGE_SHIFT, tsz, | 467 | paddr >> PAGE_SHIFT, tsz, |
392 | vma->vm_page_prot)) | 468 | vma->vm_page_prot)) |
393 | goto fail; | 469 | goto fail; |
394 | size -= tsz; | 470 | size -= tsz; |
395 | start += tsz; | 471 | start += tsz; |