aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@amacapital.net>2014-05-19 18:58:33 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2014-05-20 14:38:42 -0400
commita62c34bd2a8a3f159945becd57401e478818d51c (patch)
tree8721aca251b468606e52376fc811dd0c8beeaeb8
parent78d683e838a60ec4ba4591cca4364cba84a9e626 (diff)
x86, mm: Improve _install_special_mapping and fix x86 vdso naming
Using arch_vma_name to give special mappings a name is awkward. x86 currently implements it by comparing the start address of the vma to the expected address of the vdso. This requires tracking the start address of special mappings and is probably buggy if a special vma is split or moved. Improve _install_special_mapping to just name the vma directly. Use it to give the x86 vvar area a name, which should make CRIU's life easier. As a side effect, the vvar area will show up in core dumps. This could be considered weird and is fixable. [hpa: I say we accept this as-is but be prepared to deal with knocking out the vvars from core dumps if this becomes a problem.] Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: Andy Lutomirski <luto@amacapital.net> Link: http://lkml.kernel.org/r/276b39b6b645fb11e345457b503f17b83c2c6fd0.1400538962.git.luto@amacapital.net Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/include/asm/vdso.h6
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/vdso/vdso2c.h5
-rw-r--r--arch/x86/vdso/vdso32-setup.c7
-rw-r--r--arch/x86/vdso/vma.c25
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/mm_types.h6
-rw-r--r--mm/mmap.c89
8 files changed, 94 insertions, 51 deletions
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index d0a2c909c72d..30be253dd283 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -7,10 +7,14 @@
7 7
8#ifndef __ASSEMBLER__ 8#ifndef __ASSEMBLER__
9 9
10#include <linux/mm_types.h>
11
10struct vdso_image { 12struct vdso_image {
11 void *data; 13 void *data;
12 unsigned long size; /* Always a multiple of PAGE_SIZE */ 14 unsigned long size; /* Always a multiple of PAGE_SIZE */
13 struct page **pages; /* Big enough for data/size page pointers */ 15
16 /* text_mapping.pages is big enough for data/size page pointers */
17 struct vm_special_mapping text_mapping;
14 18
15 unsigned long alt, alt_len; 19 unsigned long alt, alt_len;
16 20
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6f881842116c..9deb59b0baea 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1223,9 +1223,6 @@ int in_gate_area_no_mm(unsigned long addr)
1223 1223
1224const char *arch_vma_name(struct vm_area_struct *vma) 1224const char *arch_vma_name(struct vm_area_struct *vma)
1225{ 1225{
1226 if (vma->vm_mm && vma->vm_start ==
1227 (long __force)vma->vm_mm->context.vdso)
1228 return "[vdso]";
1229 if (vma == &gate_vma) 1226 if (vma == &gate_vma)
1230 return "[vsyscall]"; 1227 return "[vsyscall]";
1231 return NULL; 1228 return NULL;
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index ed2e894e89ab..3dcc61e796e9 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -136,7 +136,10 @@ static int GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
136 fprintf(outfile, "const struct vdso_image %s = {\n", name); 136 fprintf(outfile, "const struct vdso_image %s = {\n", name);
137 fprintf(outfile, "\t.data = raw_data,\n"); 137 fprintf(outfile, "\t.data = raw_data,\n");
138 fprintf(outfile, "\t.size = %lu,\n", data_size); 138 fprintf(outfile, "\t.size = %lu,\n", data_size);
139 fprintf(outfile, "\t.pages = pages,\n"); 139 fprintf(outfile, "\t.text_mapping = {\n");
140 fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
141 fprintf(outfile, "\t\t.pages = pages,\n");
142 fprintf(outfile, "\t},\n");
140 if (alt_sec) { 143 if (alt_sec) {
141 fprintf(outfile, "\t.alt = %lu,\n", 144 fprintf(outfile, "\t.alt = %lu,\n",
142 (unsigned long)alt_sec->sh_offset); 145 (unsigned long)alt_sec->sh_offset);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index c3ed708e50f4..e4f7781ee162 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -119,13 +119,6 @@ __initcall(ia32_binfmt_init);
119 119
120#else /* CONFIG_X86_32 */ 120#else /* CONFIG_X86_32 */
121 121
122const char *arch_vma_name(struct vm_area_struct *vma)
123{
124 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
125 return "[vdso]";
126 return NULL;
127}
128
129struct vm_area_struct *get_gate_vma(struct mm_struct *mm) 122struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
130{ 123{
131 return NULL; 124 return NULL;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 8ad0081df7a8..e1513c47872a 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -30,7 +30,8 @@ void __init init_vdso_image(const struct vdso_image *image)
30 30
31 BUG_ON(image->size % PAGE_SIZE != 0); 31 BUG_ON(image->size % PAGE_SIZE != 0);
32 for (i = 0; i < npages; i++) 32 for (i = 0; i < npages; i++)
33 image->pages[i] = virt_to_page(image->data + i*PAGE_SIZE); 33 image->text_mapping.pages[i] =
34 virt_to_page(image->data + i*PAGE_SIZE);
34 35
35 apply_alternatives((struct alt_instr *)(image->data + image->alt), 36 apply_alternatives((struct alt_instr *)(image->data + image->alt),
36 (struct alt_instr *)(image->data + image->alt + 37 (struct alt_instr *)(image->data + image->alt +
@@ -91,6 +92,10 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
91 unsigned long addr; 92 unsigned long addr;
92 int ret = 0; 93 int ret = 0;
93 static struct page *no_pages[] = {NULL}; 94 static struct page *no_pages[] = {NULL};
95 static struct vm_special_mapping vvar_mapping = {
96 .name = "[vvar]",
97 .pages = no_pages,
98 };
94 99
95 if (calculate_addr) { 100 if (calculate_addr) {
96 addr = vdso_addr(current->mm->start_stack, 101 addr = vdso_addr(current->mm->start_stack,
@@ -112,21 +117,23 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
112 /* 117 /*
113 * MAYWRITE to allow gdb to COW and set breakpoints 118 * MAYWRITE to allow gdb to COW and set breakpoints
114 */ 119 */
115 ret = install_special_mapping(mm, 120 vma = _install_special_mapping(mm,
116 addr, 121 addr,
117 image->size, 122 image->size,
118 VM_READ|VM_EXEC| 123 VM_READ|VM_EXEC|
119 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 124 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
120 image->pages); 125 &image->text_mapping);
121 126
122 if (ret) 127 if (IS_ERR(vma)) {
128 ret = PTR_ERR(vma);
123 goto up_fail; 129 goto up_fail;
130 }
124 131
125 vma = _install_special_mapping(mm, 132 vma = _install_special_mapping(mm,
126 addr + image->size, 133 addr + image->size,
127 image->sym_end_mapping - image->size, 134 image->sym_end_mapping - image->size,
128 VM_READ, 135 VM_READ,
129 no_pages); 136 &vvar_mapping);
130 137
131 if (IS_ERR(vma)) { 138 if (IS_ERR(vma)) {
132 ret = PTR_ERR(vma); 139 ret = PTR_ERR(vma);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63f8d4efe303..05aab09803e6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1782,7 +1782,9 @@ extern struct file *get_mm_exe_file(struct mm_struct *mm);
1782extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); 1782extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
1783extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, 1783extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
1784 unsigned long addr, unsigned long len, 1784 unsigned long addr, unsigned long len,
1785 unsigned long flags, struct page **pages); 1785 unsigned long flags,
1786 const struct vm_special_mapping *spec);
1787/* This is an obsolete alternative to _install_special_mapping. */
1786extern int install_special_mapping(struct mm_struct *mm, 1788extern int install_special_mapping(struct mm_struct *mm,
1787 unsigned long addr, unsigned long len, 1789 unsigned long addr, unsigned long len,
1788 unsigned long flags, struct page **pages); 1790 unsigned long flags, struct page **pages);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8967e20cbe57..22c6f4e16d10 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -510,4 +510,10 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
510} 510}
511#endif 511#endif
512 512
513struct vm_special_mapping
514{
515 const char *name;
516 struct page **pages;
517};
518
513#endif /* _LINUX_MM_TYPES_H */ 519#endif /* _LINUX_MM_TYPES_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index b1202cf81f4b..52bbc9514d9d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2872,6 +2872,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2872 return 1; 2872 return 1;
2873} 2873}
2874 2874
2875static int special_mapping_fault(struct vm_area_struct *vma,
2876 struct vm_fault *vmf);
2877
2878/*
2879 * Having a close hook prevents vma merging regardless of flags.
2880 */
2881static void special_mapping_close(struct vm_area_struct *vma)
2882{
2883}
2884
2885static const char *special_mapping_name(struct vm_area_struct *vma)
2886{
2887 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
2888}
2889
2890static const struct vm_operations_struct special_mapping_vmops = {
2891 .close = special_mapping_close,
2892 .fault = special_mapping_fault,
2893 .name = special_mapping_name,
2894};
2895
2896static const struct vm_operations_struct legacy_special_mapping_vmops = {
2897 .close = special_mapping_close,
2898 .fault = special_mapping_fault,
2899};
2875 2900
2876static int special_mapping_fault(struct vm_area_struct *vma, 2901static int special_mapping_fault(struct vm_area_struct *vma,
2877 struct vm_fault *vmf) 2902 struct vm_fault *vmf)
@@ -2887,7 +2912,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
2887 */ 2912 */
2888 pgoff = vmf->pgoff - vma->vm_pgoff; 2913 pgoff = vmf->pgoff - vma->vm_pgoff;
2889 2914
2890 for (pages = vma->vm_private_data; pgoff && *pages; ++pages) 2915 if (vma->vm_ops == &legacy_special_mapping_vmops)
2916 pages = vma->vm_private_data;
2917 else
2918 pages = ((struct vm_special_mapping *)vma->vm_private_data)->
2919 pages;
2920
2921 for (; pgoff && *pages; ++pages)
2891 pgoff--; 2922 pgoff--;
2892 2923
2893 if (*pages) { 2924 if (*pages) {
@@ -2900,30 +2931,11 @@ static int special_mapping_fault(struct vm_area_struct *vma,
2900 return VM_FAULT_SIGBUS; 2931 return VM_FAULT_SIGBUS;
2901} 2932}
2902 2933
2903/* 2934static struct vm_area_struct *__install_special_mapping(
2904 * Having a close hook prevents vma merging regardless of flags. 2935 struct mm_struct *mm,
2905 */ 2936 unsigned long addr, unsigned long len,
2906static void special_mapping_close(struct vm_area_struct *vma) 2937 unsigned long vm_flags, const struct vm_operations_struct *ops,
2907{ 2938 void *priv)
2908}
2909
2910static const struct vm_operations_struct special_mapping_vmops = {
2911 .close = special_mapping_close,
2912 .fault = special_mapping_fault,
2913};
2914
2915/*
2916 * Called with mm->mmap_sem held for writing.
2917 * Insert a new vma covering the given region, with the given flags.
2918 * Its pages are supplied by the given array of struct page *.
2919 * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
2920 * The region past the last page supplied will always produce SIGBUS.
2921 * The array pointer and the pages it points to are assumed to stay alive
2922 * for as long as this mapping might exist.
2923 */
2924struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
2925 unsigned long addr, unsigned long len,
2926 unsigned long vm_flags, struct page **pages)
2927{ 2939{
2928 int ret; 2940 int ret;
2929 struct vm_area_struct *vma; 2941 struct vm_area_struct *vma;
@@ -2940,8 +2952,8 @@ struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
2940 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; 2952 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
2941 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 2953 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2942 2954
2943 vma->vm_ops = &special_mapping_vmops; 2955 vma->vm_ops = ops;
2944 vma->vm_private_data = pages; 2956 vma->vm_private_data = priv;
2945 2957
2946 ret = insert_vm_struct(mm, vma); 2958 ret = insert_vm_struct(mm, vma);
2947 if (ret) 2959 if (ret)
@@ -2958,12 +2970,31 @@ out:
2958 return ERR_PTR(ret); 2970 return ERR_PTR(ret);
2959} 2971}
2960 2972
2973/*
2974 * Called with mm->mmap_sem held for writing.
2975 * Insert a new vma covering the given region, with the given flags.
2976 * Its pages are supplied by the given array of struct page *.
2977 * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
2978 * The region past the last page supplied will always produce SIGBUS.
2979 * The array pointer and the pages it points to are assumed to stay alive
2980 * for as long as this mapping might exist.
2981 */
2982struct vm_area_struct *_install_special_mapping(
2983 struct mm_struct *mm,
2984 unsigned long addr, unsigned long len,
2985 unsigned long vm_flags, const struct vm_special_mapping *spec)
2986{
2987 return __install_special_mapping(mm, addr, len, vm_flags,
2988 &special_mapping_vmops, (void *)spec);
2989}
2990
2961int install_special_mapping(struct mm_struct *mm, 2991int install_special_mapping(struct mm_struct *mm,
2962 unsigned long addr, unsigned long len, 2992 unsigned long addr, unsigned long len,
2963 unsigned long vm_flags, struct page **pages) 2993 unsigned long vm_flags, struct page **pages)
2964{ 2994{
2965 struct vm_area_struct *vma = _install_special_mapping(mm, 2995 struct vm_area_struct *vma = __install_special_mapping(
2966 addr, len, vm_flags, pages); 2996 mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
2997 (void *)pages);
2967 2998
2968 if (IS_ERR(vma)) 2999 if (IS_ERR(vma))
2969 return PTR_ERR(vma); 3000 return PTR_ERR(vma);