diff options
| author | Olaf Hering <olaf@aepfle.de> | 2011-05-26 19:25:54 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:37 -0400 |
| commit | 997c136f518c5debd63847e78e2a8694f56dcf90 (patch) | |
| tree | ac2ff48901be3e6834757675dcc177732e034a9f /fs/proc | |
| parent | 98bc93e505c03403479c6669c4ff97301cee6199 (diff) | |
fs/proc/vmcore.c: add hook to read_from_oldmem() to check for non-ram pages
The balloon driver in a Xen guest frees guest pages and marks them as
mmio. When the kernel crashes and the crash kernel attempts to read the
oldmem via /proc/vmcore a read from ballooned pages will generate 100%
load in dom0 because Xen asks qemu-dm for the page content. Since the
reads come in as 8byte requests each ballooned page is tried 512 times.
With this change a hook can be registered which checks wether the given
pfn is really ram. The hook has to return a value > 0 for ram pages, a
value < 0 on error (because the hypercall is not known) and 0 for non-ram
pages.
This will reduce the time to read /proc/vmcore. Without this change a
512M guest with 128M crashkernel region needs 200 seconds to read it, with
this change it takes just 2 seconds.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/vmcore.c | 52 |
1 files changed, 49 insertions, 3 deletions
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 74802bc5ded..cd99bf55765 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
| @@ -35,6 +35,46 @@ static u64 vmcore_size; | |||
| 35 | 35 | ||
| 36 | static struct proc_dir_entry *proc_vmcore = NULL; | 36 | static struct proc_dir_entry *proc_vmcore = NULL; |
| 37 | 37 | ||
| 38 | /* | ||
| 39 | * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error | ||
| 40 | * The called function has to take care of module refcounting. | ||
| 41 | */ | ||
| 42 | static int (*oldmem_pfn_is_ram)(unsigned long pfn); | ||
| 43 | |||
| 44 | int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)) | ||
| 45 | { | ||
| 46 | if (oldmem_pfn_is_ram) | ||
| 47 | return -EBUSY; | ||
| 48 | oldmem_pfn_is_ram = fn; | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram); | ||
| 52 | |||
| 53 | void unregister_oldmem_pfn_is_ram(void) | ||
| 54 | { | ||
| 55 | oldmem_pfn_is_ram = NULL; | ||
| 56 | wmb(); | ||
| 57 | } | ||
| 58 | EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram); | ||
| 59 | |||
| 60 | static int pfn_is_ram(unsigned long pfn) | ||
| 61 | { | ||
| 62 | int (*fn)(unsigned long pfn); | ||
| 63 | /* pfn is ram unless fn() checks pagetype */ | ||
| 64 | int ret = 1; | ||
| 65 | |||
| 66 | /* | ||
| 67 | * Ask hypervisor if the pfn is really ram. | ||
| 68 | * A ballooned page contains no data and reading from such a page | ||
| 69 | * will cause high load in the hypervisor. | ||
| 70 | */ | ||
| 71 | fn = oldmem_pfn_is_ram; | ||
| 72 | if (fn) | ||
| 73 | ret = fn(pfn); | ||
| 74 | |||
| 75 | return ret; | ||
| 76 | } | ||
| 77 | |||
| 38 | /* Reads a page from the oldmem device from given offset. */ | 78 | /* Reads a page from the oldmem device from given offset. */ |
| 39 | static ssize_t read_from_oldmem(char *buf, size_t count, | 79 | static ssize_t read_from_oldmem(char *buf, size_t count, |
| 40 | u64 *ppos, int userbuf) | 80 | u64 *ppos, int userbuf) |
| @@ -55,9 +95,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count, | |||
| 55 | else | 95 | else |
| 56 | nr_bytes = count; | 96 | nr_bytes = count; |
| 57 | 97 | ||
| 58 | tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); | 98 | /* If pfn is not ram, return zeros for sparse dump files */ |
| 59 | if (tmp < 0) | 99 | if (pfn_is_ram(pfn) == 0) |
| 60 | return tmp; | 100 | memset(buf, 0, nr_bytes); |
| 101 | else { | ||
| 102 | tmp = copy_oldmem_page(pfn, buf, nr_bytes, | ||
| 103 | offset, userbuf); | ||
| 104 | if (tmp < 0) | ||
| 105 | return tmp; | ||
| 106 | } | ||
| 61 | *ppos += nr_bytes; | 107 | *ppos += nr_bytes; |
| 62 | count -= nr_bytes; | 108 | count -= nr_bytes; |
| 63 | buf += nr_bytes; | 109 | buf += nr_bytes; |
