aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mincore.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.osdl.org>2006-12-16 12:44:32 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-16 12:44:32 -0500
commit2f77d107050abc14bc393b34bdb7b91cf670c250 (patch)
tree6651586fb1b10f60cd6acdb3222bafac9c2d7aa8 /mm/mincore.c
parent0221872a3b0aa2fa2f3fa60affcbaebd662c4a90 (diff)
Fix incorrect user space access locking in mincore()
Doug Chapman noticed that mincore() will doa "copy_to_user()" of the result while holding the mmap semaphore for reading, which is a big no-no. While a recursive read-lock on a semaphore in the case of a page fault happens to work, we don't actually allow them due to deadlock schenarios with writers due to fairness issues. Doug and Marcel sent in a patch to fix it, but I decided to just rewrite the mess instead - not just fixing the locking problem, but making the code smaller and (imho) much easier to understand. Cc: Doug Chapman <dchapman@redhat.com> Cc: Marcel Holtmann <holtmann@redhat.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/mincore.c')
-rw-r--r--mm/mincore.c190
1 files changed, 86 insertions, 104 deletions
diff --git a/mm/mincore.c b/mm/mincore.c
index 72890780c1c9..b44d7f875cb6 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/mm/mincore.c 2 * linux/mm/mincore.c
3 * 3 *
4 * Copyright (C) 1994-1999 Linus Torvalds 4 * Copyright (C) 1994-2006 Linus Torvalds
5 */ 5 */
6 6
7/* 7/*
@@ -38,46 +38,60 @@ static unsigned char mincore_page(struct vm_area_struct * vma,
38 return present; 38 return present;
39} 39}
40 40
41static long mincore_vma(struct vm_area_struct * vma, 41/*
42 unsigned long start, unsigned long end, unsigned char __user * vec) 42 * Do a chunk of "sys_mincore()". We've already checked
43 * all the arguments, we hold the mmap semaphore: we should
44 * just return the amount of info we're asked for.
45 */
46static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
43{ 47{
44 long error, i, remaining; 48 unsigned long i, nr, pgoff;
45 unsigned char * tmp; 49 struct vm_area_struct *vma = find_vma(current->mm, addr);
46 50
47 error = -ENOMEM; 51 /*
48 if (!vma->vm_file) 52 * find_vma() didn't find anything: the address
49 return error; 53 * is above everything we have mapped.
50 54 */
51 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 55 if (!vma) {
52 if (end > vma->vm_end) 56 memset(vec, 0, pages);
53 end = vma->vm_end; 57 return pages;
54 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 58 }
55 59
56 error = -EAGAIN; 60 /*
57 tmp = (unsigned char *) __get_free_page(GFP_KERNEL); 61 * find_vma() found something, but we might be
58 if (!tmp) 62 * below it: check for that.
59 return error; 63 */
64 if (addr < vma->vm_start) {
65 unsigned long gap = (vma->vm_start - addr) >> PAGE_SHIFT;
66 if (gap > pages)
67 gap = pages;
68 memset(vec, 0, gap);
69 return gap;
70 }
60 71
61 /* (end - start) is # of pages, and also # of bytes in "vec */ 72 /*
62 remaining = (end - start), 73 * Ok, got it. But check whether it's a segment we support
74 * mincore() on. Right now, we don't do any anonymous mappings.
75 */
76 if (!vma->vm_file)
77 return -ENOMEM;
63 78
64 error = 0; 79 /*
65 for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) { 80 * Calculate how many pages there are left in the vma, and
66 int j = 0; 81 * what the pgoff is for our address.
67 long thispiece = (remaining < PAGE_SIZE) ? 82 */
68 remaining : PAGE_SIZE; 83 nr = (vma->vm_end - addr) >> PAGE_SHIFT;
84 if (nr > pages)
85 nr = pages;
69 86
70 while (j < thispiece) 87 pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
71 tmp[j++] = mincore_page(vma, start++); 88 pgoff += vma->vm_pgoff;
72 89
73 if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) { 90 /* And then we just fill the sucker in.. */
74 error = -EFAULT; 91 for (i = 0 ; i < nr; i++, pgoff++)
75 break; 92 vec[i] = mincore_page(vma, pgoff);
76 }
77 }
78 93
79 free_page((unsigned long) tmp); 94 return nr;
80 return error;
81} 95}
82 96
83/* 97/*
@@ -107,82 +121,50 @@ static long mincore_vma(struct vm_area_struct * vma,
107asmlinkage long sys_mincore(unsigned long start, size_t len, 121asmlinkage long sys_mincore(unsigned long start, size_t len,
108 unsigned char __user * vec) 122 unsigned char __user * vec)
109{ 123{
110 int index = 0; 124 long retval;
111 unsigned long end, limit; 125 unsigned long pages;
112 struct vm_area_struct * vma; 126 unsigned char *tmp;
113 size_t max;
114 int unmapped_error = 0;
115 long error;
116
117 /* check the arguments */
118 if (start & ~PAGE_CACHE_MASK)
119 goto einval;
120
121 limit = TASK_SIZE;
122 if (start >= limit)
123 goto enomem;
124
125 if (!len)
126 return 0;
127 127
128 max = limit - start; 128 /* Check the start address: needs to be page-aligned.. */
129 len = PAGE_CACHE_ALIGN(len); 129 if (start & ~PAGE_CACHE_MASK)
130 if (len > max || !len) 130 return -EINVAL;
131 goto enomem;
132
133 end = start + len;
134 131
135 /* check the output buffer whilst holding the lock */ 132 /* ..and we need to be passed a valid user-space range */
136 error = -EFAULT; 133 if (!access_ok(VERIFY_READ, (void __user *) start, len))
137 down_read(&current->mm->mmap_sem); 134 return -ENOMEM;
138 135
139 if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT)) 136 /* This also avoids any overflows on PAGE_CACHE_ALIGN */
140 goto out; 137 pages = len >> PAGE_SHIFT;
138 pages += (len & ~PAGE_MASK) != 0;
141 139
142 /* 140 if (!access_ok(VERIFY_WRITE, vec, pages))
143 * If the interval [start,end) covers some unmapped address 141 return -EFAULT;
144 * ranges, just ignore them, but return -ENOMEM at the end.
145 */
146 error = 0;
147
148 vma = find_vma(current->mm, start);
149 while (vma) {
150 /* Here start < vma->vm_end. */
151 if (start < vma->vm_start) {
152 unmapped_error = -ENOMEM;
153 start = vma->vm_start;
154 }
155 142
156 /* Here vma->vm_start <= start < vma->vm_end. */ 143 tmp = (void *) __get_free_page(GFP_USER);
157 if (end <= vma->vm_end) { 144 if (!tmp)
158 if (start < end) { 145 return -ENOMEM;
159 error = mincore_vma(vma, start, end, 146
160 &vec[index]); 147 retval = 0;
161 if (error) 148 while (pages) {
162 goto out; 149 /*
163 } 150 * Do at most PAGE_SIZE entries per iteration, due to
164 error = unmapped_error; 151 * the temporary buffer size.
165 goto out; 152 */
153 down_read(&current->mm->mmap_sem);
154 retval = do_mincore(start, tmp, max(pages, PAGE_SIZE));
155 up_read(&current->mm->mmap_sem);
156
157 if (retval <= 0)
158 break;
159 if (copy_to_user(vec, tmp, retval)) {
160 retval = -EFAULT;
161 break;
166 } 162 }
167 163 pages -= retval;
168 /* Here vma->vm_start <= start < vma->vm_end < end. */ 164 vec += retval;
169 error = mincore_vma(vma, start, vma->vm_end, &vec[index]); 165 start += retval << PAGE_SHIFT;
170 if (error) 166 retval = 0;
171 goto out;
172 index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
173 start = vma->vm_end;
174 vma = vma->vm_next;
175 } 167 }
176 168 free_page((unsigned long) tmp);
177 /* we found a hole in the area queried if we arrive here */ 169 return retval;
178 error = -ENOMEM;
179
180out:
181 up_read(&current->mm->mmap_sem);
182 return error;
183
184einval:
185 return -EINVAL;
186enomem:
187 return -ENOMEM;
188} 170}