diff options
author | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-16 12:44:32 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-16 12:44:32 -0500 |
commit | 2f77d107050abc14bc393b34bdb7b91cf670c250 (patch) | |
tree | 6651586fb1b10f60cd6acdb3222bafac9c2d7aa8 | |
parent | 0221872a3b0aa2fa2f3fa60affcbaebd662c4a90 (diff) |
Fix incorrect user space access locking in mincore()
Doug Chapman noticed that mincore() will doa "copy_to_user()" of the
result while holding the mmap semaphore for reading, which is a big
no-no. While a recursive read-lock on a semaphore in the case of a page
fault happens to work, we don't actually allow them due to deadlock
schenarios with writers due to fairness issues.
Doug and Marcel sent in a patch to fix it, but I decided to just rewrite
the mess instead - not just fixing the locking problem, but making the
code smaller and (imho) much easier to understand.
Cc: Doug Chapman <dchapman@redhat.com>
Cc: Marcel Holtmann <holtmann@redhat.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/mincore.c | 190 |
1 files changed, 86 insertions, 104 deletions
diff --git a/mm/mincore.c b/mm/mincore.c index 72890780c1c9..b44d7f875cb6 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * linux/mm/mincore.c | 2 | * linux/mm/mincore.c |
3 | * | 3 | * |
4 | * Copyright (C) 1994-1999 Linus Torvalds | 4 | * Copyright (C) 1994-2006 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* | 7 | /* |
@@ -38,46 +38,60 @@ static unsigned char mincore_page(struct vm_area_struct * vma, | |||
38 | return present; | 38 | return present; |
39 | } | 39 | } |
40 | 40 | ||
41 | static long mincore_vma(struct vm_area_struct * vma, | 41 | /* |
42 | unsigned long start, unsigned long end, unsigned char __user * vec) | 42 | * Do a chunk of "sys_mincore()". We've already checked |
43 | * all the arguments, we hold the mmap semaphore: we should | ||
44 | * just return the amount of info we're asked for. | ||
45 | */ | ||
46 | static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages) | ||
43 | { | 47 | { |
44 | long error, i, remaining; | 48 | unsigned long i, nr, pgoff; |
45 | unsigned char * tmp; | 49 | struct vm_area_struct *vma = find_vma(current->mm, addr); |
46 | 50 | ||
47 | error = -ENOMEM; | 51 | /* |
48 | if (!vma->vm_file) | 52 | * find_vma() didn't find anything: the address |
49 | return error; | 53 | * is above everything we have mapped. |
50 | 54 | */ | |
51 | start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 55 | if (!vma) { |
52 | if (end > vma->vm_end) | 56 | memset(vec, 0, pages); |
53 | end = vma->vm_end; | 57 | return pages; |
54 | end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 58 | } |
55 | 59 | ||
56 | error = -EAGAIN; | 60 | /* |
57 | tmp = (unsigned char *) __get_free_page(GFP_KERNEL); | 61 | * find_vma() found something, but we might be |
58 | if (!tmp) | 62 | * below it: check for that. |
59 | return error; | 63 | */ |
64 | if (addr < vma->vm_start) { | ||
65 | unsigned long gap = (vma->vm_start - addr) >> PAGE_SHIFT; | ||
66 | if (gap > pages) | ||
67 | gap = pages; | ||
68 | memset(vec, 0, gap); | ||
69 | return gap; | ||
70 | } | ||
60 | 71 | ||
61 | /* (end - start) is # of pages, and also # of bytes in "vec */ | 72 | /* |
62 | remaining = (end - start), | 73 | * Ok, got it. But check whether it's a segment we support |
74 | * mincore() on. Right now, we don't do any anonymous mappings. | ||
75 | */ | ||
76 | if (!vma->vm_file) | ||
77 | return -ENOMEM; | ||
63 | 78 | ||
64 | error = 0; | 79 | /* |
65 | for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) { | 80 | * Calculate how many pages there are left in the vma, and |
66 | int j = 0; | 81 | * what the pgoff is for our address. |
67 | long thispiece = (remaining < PAGE_SIZE) ? | 82 | */ |
68 | remaining : PAGE_SIZE; | 83 | nr = (vma->vm_end - addr) >> PAGE_SHIFT; |
84 | if (nr > pages) | ||
85 | nr = pages; | ||
69 | 86 | ||
70 | while (j < thispiece) | 87 | pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; |
71 | tmp[j++] = mincore_page(vma, start++); | 88 | pgoff += vma->vm_pgoff; |
72 | 89 | ||
73 | if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) { | 90 | /* And then we just fill the sucker in.. */ |
74 | error = -EFAULT; | 91 | for (i = 0 ; i < nr; i++, pgoff++) |
75 | break; | 92 | vec[i] = mincore_page(vma, pgoff); |
76 | } | ||
77 | } | ||
78 | 93 | ||
79 | free_page((unsigned long) tmp); | 94 | return nr; |
80 | return error; | ||
81 | } | 95 | } |
82 | 96 | ||
83 | /* | 97 | /* |
@@ -107,82 +121,50 @@ static long mincore_vma(struct vm_area_struct * vma, | |||
107 | asmlinkage long sys_mincore(unsigned long start, size_t len, | 121 | asmlinkage long sys_mincore(unsigned long start, size_t len, |
108 | unsigned char __user * vec) | 122 | unsigned char __user * vec) |
109 | { | 123 | { |
110 | int index = 0; | 124 | long retval; |
111 | unsigned long end, limit; | 125 | unsigned long pages; |
112 | struct vm_area_struct * vma; | 126 | unsigned char *tmp; |
113 | size_t max; | ||
114 | int unmapped_error = 0; | ||
115 | long error; | ||
116 | |||
117 | /* check the arguments */ | ||
118 | if (start & ~PAGE_CACHE_MASK) | ||
119 | goto einval; | ||
120 | |||
121 | limit = TASK_SIZE; | ||
122 | if (start >= limit) | ||
123 | goto enomem; | ||
124 | |||
125 | if (!len) | ||
126 | return 0; | ||
127 | 127 | ||
128 | max = limit - start; | 128 | /* Check the start address: needs to be page-aligned.. */ |
129 | len = PAGE_CACHE_ALIGN(len); | 129 | if (start & ~PAGE_CACHE_MASK) |
130 | if (len > max || !len) | 130 | return -EINVAL; |
131 | goto enomem; | ||
132 | |||
133 | end = start + len; | ||
134 | 131 | ||
135 | /* check the output buffer whilst holding the lock */ | 132 | /* ..and we need to be passed a valid user-space range */ |
136 | error = -EFAULT; | 133 | if (!access_ok(VERIFY_READ, (void __user *) start, len)) |
137 | down_read(¤t->mm->mmap_sem); | 134 | return -ENOMEM; |
138 | 135 | ||
139 | if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT)) | 136 | /* This also avoids any overflows on PAGE_CACHE_ALIGN */ |
140 | goto out; | 137 | pages = len >> PAGE_SHIFT; |
138 | pages += (len & ~PAGE_MASK) != 0; | ||
141 | 139 | ||
142 | /* | 140 | if (!access_ok(VERIFY_WRITE, vec, pages)) |
143 | * If the interval [start,end) covers some unmapped address | 141 | return -EFAULT; |
144 | * ranges, just ignore them, but return -ENOMEM at the end. | ||
145 | */ | ||
146 | error = 0; | ||
147 | |||
148 | vma = find_vma(current->mm, start); | ||
149 | while (vma) { | ||
150 | /* Here start < vma->vm_end. */ | ||
151 | if (start < vma->vm_start) { | ||
152 | unmapped_error = -ENOMEM; | ||
153 | start = vma->vm_start; | ||
154 | } | ||
155 | 142 | ||
156 | /* Here vma->vm_start <= start < vma->vm_end. */ | 143 | tmp = (void *) __get_free_page(GFP_USER); |
157 | if (end <= vma->vm_end) { | 144 | if (!tmp) |
158 | if (start < end) { | 145 | return -ENOMEM; |
159 | error = mincore_vma(vma, start, end, | 146 | |
160 | &vec[index]); | 147 | retval = 0; |
161 | if (error) | 148 | while (pages) { |
162 | goto out; | 149 | /* |
163 | } | 150 | * Do at most PAGE_SIZE entries per iteration, due to |
164 | error = unmapped_error; | 151 | * the temporary buffer size. |
165 | goto out; | 152 | */ |
153 | down_read(¤t->mm->mmap_sem); | ||
154 | retval = do_mincore(start, tmp, max(pages, PAGE_SIZE)); | ||
155 | up_read(¤t->mm->mmap_sem); | ||
156 | |||
157 | if (retval <= 0) | ||
158 | break; | ||
159 | if (copy_to_user(vec, tmp, retval)) { | ||
160 | retval = -EFAULT; | ||
161 | break; | ||
166 | } | 162 | } |
167 | 163 | pages -= retval; | |
168 | /* Here vma->vm_start <= start < vma->vm_end < end. */ | 164 | vec += retval; |
169 | error = mincore_vma(vma, start, vma->vm_end, &vec[index]); | 165 | start += retval << PAGE_SHIFT; |
170 | if (error) | 166 | retval = 0; |
171 | goto out; | ||
172 | index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT; | ||
173 | start = vma->vm_end; | ||
174 | vma = vma->vm_next; | ||
175 | } | 167 | } |
176 | 168 | free_page((unsigned long) tmp); | |
177 | /* we found a hole in the area queried if we arrive here */ | 169 | return retval; |
178 | error = -ENOMEM; | ||
179 | |||
180 | out: | ||
181 | up_read(¤t->mm->mmap_sem); | ||
182 | return error; | ||
183 | |||
184 | einval: | ||
185 | return -EINVAL; | ||
186 | enomem: | ||
187 | return -ENOMEM; | ||
188 | } | 170 | } |