aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Baron <jbaron@redhat.com>2012-03-23 18:02:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 19:58:42 -0400
commit909af768e88867016f427264ae39d27a57b6a8ed (patch)
tree5068b4d98e4bedecde89d9113dc7ef8c69633f45
parent1cc684ab75123efe7ff446eb821d44375ba8fa30 (diff)
coredump: remove VM_ALWAYSDUMP flag
The motivation for this patchset was that I was looking at a way for a qemu-kvm process, to exclude the guest memory from its core dump, which can be quite large. There are already a number of filter flags in /proc/<pid>/coredump_filter, however, these allow one to specify 'types' of kernel memory, not specific address ranges (which is needed in this case). Since there are no more vma flags available, the first patch eliminates the need for the 'VM_ALWAYSDUMP' flag. The flag is used internally by the kernel to mark vdso and vsyscall pages. However, it is simple enough to check if a vma covers a vdso or vsyscall page without the need for this flag. The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new 'VM_NODUMP' flag, which can be set by userspace using new madvise flags: 'MADV_DONTDUMP', and unset via 'MADV_DODUMP'. The core dump filters continue to work the same as before unless 'MADV_DONTDUMP' is set on the region. The qemu code which implements this features is at: http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch In my testing the qemu core dump shrunk from 383MB -> 13MB with this patch. I also believe that the 'MADV_DONTDUMP' flag might be useful for security sensitive apps, which might want to select which areas are dumped. This patch: The VM_ALWAYSDUMP flag is currently used by the coredump code to indicate that a vma is part of a vsyscall or vdso section. However, we can determine if a vma is in one these sections by checking it against the gate_vma and checking for a non-NULL return value from arch_vma_name(). Thus, freeing a valuable vma bit. Signed-off-by: Jason Baron <jbaron@redhat.com> Acked-by: Roland McGrath <roland@hack.frob.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/arm/kernel/process.c3
-rw-r--r--arch/hexagon/kernel/vdso.c3
-rw-r--r--arch/mips/kernel/vdso.c3
-rw-r--r--arch/powerpc/kernel/vdso.c10
-rw-r--r--arch/s390/kernel/vdso.c10
-rw-r--r--arch/sh/kernel/vsyscall/vsyscall.c3
-rw-r--r--arch/tile/mm/elf.c8
-rw-r--r--arch/unicore32/kernel/process.c2
-rw-r--r--arch/x86/um/mem_32.c8
-rw-r--r--arch/x86/um/vdso/vma.c3
-rw-r--r--arch/x86/vdso/vdso32-setup.c17
-rw-r--r--arch/x86/vdso/vma.c3
-rw-r--r--fs/binfmt_elf.c27
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/memory.c8
15 files changed, 40 insertions, 69 deletions
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index c2ae3cd331fe..219e4efee1a6 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -533,8 +533,7 @@ int vectors_user_mapping(void)
533 struct mm_struct *mm = current->mm; 533 struct mm_struct *mm = current->mm;
534 return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, 534 return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
535 VM_READ | VM_EXEC | 535 VM_READ | VM_EXEC |
536 VM_MAYREAD | VM_MAYEXEC | 536 VM_MAYREAD | VM_MAYEXEC | VM_RESERVED,
537 VM_ALWAYSDUMP | VM_RESERVED,
538 NULL); 537 NULL);
539} 538}
540 539
diff --git a/arch/hexagon/kernel/vdso.c b/arch/hexagon/kernel/vdso.c
index 16277c33308a..f212a453b527 100644
--- a/arch/hexagon/kernel/vdso.c
+++ b/arch/hexagon/kernel/vdso.c
@@ -78,8 +78,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
78 /* MAYWRITE to allow gdb to COW and set breakpoints. */ 78 /* MAYWRITE to allow gdb to COW and set breakpoints. */
79 ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, 79 ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
80 VM_READ|VM_EXEC| 80 VM_READ|VM_EXEC|
81 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 81 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
82 VM_ALWAYSDUMP,
83 &vdso_page); 82 &vdso_page);
84 83
85 if (ret) 84 if (ret)
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index e5cdfd603f8f..0f1af58b036a 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -88,8 +88,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
88 88
89 ret = install_special_mapping(mm, addr, PAGE_SIZE, 89 ret = install_special_mapping(mm, addr, PAGE_SIZE,
90 VM_READ|VM_EXEC| 90 VM_READ|VM_EXEC|
91 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 91 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
92 VM_ALWAYSDUMP,
93 &vdso_page); 92 &vdso_page);
94 93
95 if (ret) 94 if (ret)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7d14bb697d40..d36ee1055f88 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -263,17 +263,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
263 * the "data" page of the vDSO or you'll stop getting kernel updates 263 * the "data" page of the vDSO or you'll stop getting kernel updates
264 * and your nice userland gettimeofday will be totally dead. 264 * and your nice userland gettimeofday will be totally dead.
265 * It's fine to use that for setting breakpoints in the vDSO code 265 * It's fine to use that for setting breakpoints in the vDSO code
266 * pages though 266 * pages though.
267 *
268 * Make sure the vDSO gets into every core dump.
269 * Dumping its contents makes post-mortem fully interpretable later
270 * without matching up the same kernel and hardware config to see
271 * what PC values meant.
272 */ 267 */
273 rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, 268 rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
274 VM_READ|VM_EXEC| 269 VM_READ|VM_EXEC|
275 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 270 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
276 VM_ALWAYSDUMP,
277 vdso_pagelist); 271 vdso_pagelist);
278 if (rc) { 272 if (rc) {
279 current->mm->context.vdso_base = 0; 273 current->mm->context.vdso_base = 0;
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index e704a9965f90..9c80138206b0 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -241,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
241 * on the "data" page of the vDSO or you'll stop getting kernel 241 * on the "data" page of the vDSO or you'll stop getting kernel
242 * updates and your nice userland gettimeofday will be totally dead. 242 * updates and your nice userland gettimeofday will be totally dead.
243 * It's fine to use that for setting breakpoints in the vDSO code 243 * It's fine to use that for setting breakpoints in the vDSO code
244 * pages though 244 * pages though.
245 *
246 * Make sure the vDSO gets into every core dump.
247 * Dumping its contents makes post-mortem fully interpretable later
248 * without matching up the same kernel and hardware config to see
249 * what PC values meant.
250 */ 245 */
251 rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, 246 rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
252 VM_READ|VM_EXEC| 247 VM_READ|VM_EXEC|
253 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 248 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
254 VM_ALWAYSDUMP,
255 vdso_pagelist); 249 vdso_pagelist);
256 if (rc) 250 if (rc)
257 current->mm->context.vdso_base = 0; 251 current->mm->context.vdso_base = 0;
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 1d6d51a1ce79..5ca579720a09 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -73,8 +73,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
73 73
74 ret = install_special_mapping(mm, addr, PAGE_SIZE, 74 ret = install_special_mapping(mm, addr, PAGE_SIZE,
75 VM_READ | VM_EXEC | 75 VM_READ | VM_EXEC |
76 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | 76 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
77 VM_ALWAYSDUMP,
78 syscall_pages); 77 syscall_pages);
79 if (unlikely(ret)) 78 if (unlikely(ret))
80 goto up_fail; 79 goto up_fail;
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 55e58e93bfc5..1a00fb64fc88 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -117,17 +117,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
117 117
118 /* 118 /*
119 * MAYWRITE to allow gdb to COW and set breakpoints 119 * MAYWRITE to allow gdb to COW and set breakpoints
120 *
121 * Make sure the vDSO gets into every core dump. Dumping its
122 * contents makes post-mortem fully interpretable later
123 * without matching up the same kernel and hardware config to
124 * see what PC values meant.
125 */ 120 */
126 vdso_base = VDSO_BASE; 121 vdso_base = VDSO_BASE;
127 retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, 122 retval = install_special_mapping(mm, vdso_base, PAGE_SIZE,
128 VM_READ|VM_EXEC| 123 VM_READ|VM_EXEC|
129 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 124 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
130 VM_ALWAYSDUMP,
131 vdso_pages); 125 vdso_pages);
132 126
133#ifndef __tilegx__ 127#ifndef __tilegx__
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index 52edc2b62873..432b4291f37b 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -381,7 +381,7 @@ int vectors_user_mapping(void)
381 return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, 381 return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
382 VM_READ | VM_EXEC | 382 VM_READ | VM_EXEC |
383 VM_MAYREAD | VM_MAYEXEC | 383 VM_MAYREAD | VM_MAYEXEC |
384 VM_ALWAYSDUMP | VM_RESERVED, 384 VM_RESERVED,
385 NULL); 385 NULL);
386} 386}
387 387
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 639900a6fde9..f40281e5d6a2 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -23,14 +23,6 @@ static int __init gate_vma_init(void)
23 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 23 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
24 gate_vma.vm_page_prot = __P101; 24 gate_vma.vm_page_prot = __P101;
25 25
26 /*
27 * Make sure the vDSO gets into every core dump.
28 * Dumping its contents makes post-mortem fully interpretable later
29 * without matching up the same kernel and hardware config to see
30 * what PC values meant.
31 */
32 gate_vma.vm_flags |= VM_ALWAYSDUMP;
33
34 return 0; 26 return 0;
35} 27}
36__initcall(gate_vma_init); 28__initcall(gate_vma_init);
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 91f4ec9a0a56..af91901babb8 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
64 64
65 err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE, 65 err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
66 VM_READ|VM_EXEC| 66 VM_READ|VM_EXEC|
67 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 67 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
68 VM_ALWAYSDUMP,
69 vdsop); 68 vdsop);
70 69
71 up_write(&mm->mmap_sem); 70 up_write(&mm->mmap_sem);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 468d591dde31..a944020fa859 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -250,13 +250,7 @@ static int __init gate_vma_init(void)
250 gate_vma.vm_end = FIXADDR_USER_END; 250 gate_vma.vm_end = FIXADDR_USER_END;
251 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 251 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
252 gate_vma.vm_page_prot = __P101; 252 gate_vma.vm_page_prot = __P101;
253 /* 253
254 * Make sure the vDSO gets into every core dump.
255 * Dumping its contents makes post-mortem fully interpretable later
256 * without matching up the same kernel and hardware config to see
257 * what PC values meant.
258 */
259 gate_vma.vm_flags |= VM_ALWAYSDUMP;
260 return 0; 254 return 0;
261} 255}
262 256
@@ -343,17 +337,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
343 if (compat_uses_vma || !compat) { 337 if (compat_uses_vma || !compat) {
344 /* 338 /*
345 * MAYWRITE to allow gdb to COW and set breakpoints 339 * MAYWRITE to allow gdb to COW and set breakpoints
346 *
347 * Make sure the vDSO gets into every core dump.
348 * Dumping its contents makes post-mortem fully
349 * interpretable later without matching up the same
350 * kernel and hardware config to see what PC values
351 * meant.
352 */ 340 */
353 ret = install_special_mapping(mm, addr, PAGE_SIZE, 341 ret = install_special_mapping(mm, addr, PAGE_SIZE,
354 VM_READ|VM_EXEC| 342 VM_READ|VM_EXEC|
355 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 343 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
356 VM_ALWAYSDUMP,
357 vdso32_pages); 344 vdso32_pages);
358 345
359 if (ret) 346 if (ret)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 153407c35b75..17e18279649f 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -124,8 +124,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
124 124
125 ret = install_special_mapping(mm, addr, vdso_size, 125 ret = install_special_mapping(mm, addr, vdso_size,
126 VM_READ|VM_EXEC| 126 VM_READ|VM_EXEC|
127 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 127 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
128 VM_ALWAYSDUMP,
129 vdso_pages); 128 vdso_pages);
130 if (ret) { 129 if (ret) {
131 current->mm->context.vdso = NULL; 130 current->mm->context.vdso = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 81878b78c9d4..b64be5b5ac21 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1093,6 +1093,29 @@ out:
1093 */ 1093 */
1094 1094
1095/* 1095/*
1096 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1097 * that are useful for post-mortem analysis are included in every core dump.
1098 * In that way we ensure that the core dump is fully interpretable later
1099 * without matching up the same kernel and hardware config to see what PC values
1100 * meant. These special mappings include - vDSO, vsyscall, and other
1101 * architecture specific mappings
1102 */
1103static bool always_dump_vma(struct vm_area_struct *vma)
1104{
1105 /* Any vsyscall mappings? */
1106 if (vma == get_gate_vma(vma->vm_mm))
1107 return true;
1108 /*
1109 * arch_vma_name() returns non-NULL for special architecture mappings,
1110 * such as vDSO sections.
1111 */
1112 if (arch_vma_name(vma))
1113 return true;
1114
1115 return false;
1116}
1117
1118/*
1096 * Decide what to dump of a segment, part, all or none. 1119 * Decide what to dump of a segment, part, all or none.
1097 */ 1120 */
1098static unsigned long vma_dump_size(struct vm_area_struct *vma, 1121static unsigned long vma_dump_size(struct vm_area_struct *vma,
@@ -1100,8 +1123,8 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1100{ 1123{
1101#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1124#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1102 1125
1103 /* The vma can be set up to tell us the answer directly. */ 1126 /* always dump the vdso and vsyscall sections */
1104 if (vma->vm_flags & VM_ALWAYSDUMP) 1127 if (always_dump_vma(vma))
1105 goto whole; 1128 goto whole;
1106 1129
1107 /* Hugetlb memory check */ 1130 /* Hugetlb memory check */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7330742e7973..2de2ddba51d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -111,7 +111,6 @@ extern unsigned int kobjsize(const void *objp);
111#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ 111#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */
112#endif 112#endif
113#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ 113#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
114#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
115 114
116#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ 115#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */
117#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ 116#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
diff --git a/mm/memory.c b/mm/memory.c
index 3416b6e018d6..6105f475fa86 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3623,13 +3623,7 @@ static int __init gate_vma_init(void)
3623 gate_vma.vm_end = FIXADDR_USER_END; 3623 gate_vma.vm_end = FIXADDR_USER_END;
3624 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; 3624 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
3625 gate_vma.vm_page_prot = __P101; 3625 gate_vma.vm_page_prot = __P101;
3626 /* 3626
3627 * Make sure the vDSO gets into every core dump.
3628 * Dumping its contents makes post-mortem fully interpretable later
3629 * without matching up the same kernel and hardware config to see
3630 * what PC values meant.
3631 */
3632 gate_vma.vm_flags |= VM_ALWAYSDUMP;
3633 return 0; 3627 return 0;
3634} 3628}
3635__initcall(gate_vma_init); 3629__initcall(gate_vma_init);