aboutsummaryrefslogtreecommitdiffstats
path: root/mm/madvise.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/madvise.c')
-rw-r--r--mm/madvise.c103
1 files changed, 65 insertions, 38 deletions
diff --git a/mm/madvise.c b/mm/madvise.c
index 944b5e52d812..e3108054733c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -8,17 +8,47 @@
8#include <linux/mman.h> 8#include <linux/mman.h>
9#include <linux/pagemap.h> 9#include <linux/pagemap.h>
10#include <linux/syscalls.h> 10#include <linux/syscalls.h>
11#include <linux/mempolicy.h>
11#include <linux/hugetlb.h> 12#include <linux/hugetlb.h>
12 13
13/* 14/*
14 * We can potentially split a vm area into separate 15 * We can potentially split a vm area into separate
15 * areas, each area with its own behavior. 16 * areas, each area with its own behavior.
16 */ 17 */
17static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, 18static long madvise_behavior(struct vm_area_struct * vma,
18 unsigned long end, int behavior) 19 struct vm_area_struct **prev,
20 unsigned long start, unsigned long end, int behavior)
19{ 21{
20 struct mm_struct * mm = vma->vm_mm; 22 struct mm_struct * mm = vma->vm_mm;
21 int error = 0; 23 int error = 0;
24 pgoff_t pgoff;
25 int new_flags = vma->vm_flags & ~VM_READHINTMASK;
26
27 switch (behavior) {
28 case MADV_SEQUENTIAL:
29 new_flags |= VM_SEQ_READ;
30 break;
31 case MADV_RANDOM:
32 new_flags |= VM_RAND_READ;
33 break;
34 default:
35 break;
36 }
37
38 if (new_flags == vma->vm_flags) {
39 *prev = vma;
40 goto success;
41 }
42
43 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
44 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
45 vma->vm_file, pgoff, vma_policy(vma));
46 if (*prev) {
47 vma = *prev;
48 goto success;
49 }
50
51 *prev = vma;
22 52
23 if (start != vma->vm_start) { 53 if (start != vma->vm_start) {
24 error = split_vma(mm, vma, start, 1); 54 error = split_vma(mm, vma, start, 1);
@@ -36,21 +66,12 @@ static long madvise_behavior(struct vm_area_struct * vma, unsigned long start,
36 * vm_flags is protected by the mmap_sem held in write mode. 66 * vm_flags is protected by the mmap_sem held in write mode.
37 */ 67 */
38 VM_ClearReadHint(vma); 68 VM_ClearReadHint(vma);
39 69 vma->vm_flags = new_flags;
40 switch (behavior) {
41 case MADV_SEQUENTIAL:
42 vma->vm_flags |= VM_SEQ_READ;
43 break;
44 case MADV_RANDOM:
45 vma->vm_flags |= VM_RAND_READ;
46 break;
47 default:
48 break;
49 }
50 70
51out: 71out:
52 if (error == -ENOMEM) 72 if (error == -ENOMEM)
53 error = -EAGAIN; 73 error = -EAGAIN;
74success:
54 return error; 75 return error;
55} 76}
56 77
@@ -58,6 +79,7 @@ out:
58 * Schedule all required I/O operations. Do not wait for completion. 79 * Schedule all required I/O operations. Do not wait for completion.
59 */ 80 */
60static long madvise_willneed(struct vm_area_struct * vma, 81static long madvise_willneed(struct vm_area_struct * vma,
82 struct vm_area_struct ** prev,
61 unsigned long start, unsigned long end) 83 unsigned long start, unsigned long end)
62{ 84{
63 struct file *file = vma->vm_file; 85 struct file *file = vma->vm_file;
@@ -65,6 +87,7 @@ static long madvise_willneed(struct vm_area_struct * vma,
65 if (!file) 87 if (!file)
66 return -EBADF; 88 return -EBADF;
67 89
90 *prev = vma;
68 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 91 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
69 if (end > vma->vm_end) 92 if (end > vma->vm_end)
70 end = vma->vm_end; 93 end = vma->vm_end;
@@ -95,8 +118,10 @@ static long madvise_willneed(struct vm_area_struct * vma,
95 * dirty pages is already available as msync(MS_INVALIDATE). 118 * dirty pages is already available as msync(MS_INVALIDATE).
96 */ 119 */
97static long madvise_dontneed(struct vm_area_struct * vma, 120static long madvise_dontneed(struct vm_area_struct * vma,
121 struct vm_area_struct ** prev,
98 unsigned long start, unsigned long end) 122 unsigned long start, unsigned long end)
99{ 123{
124 *prev = vma;
100 if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma)) 125 if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma))
101 return -EINVAL; 126 return -EINVAL;
102 127
@@ -111,8 +136,8 @@ static long madvise_dontneed(struct vm_area_struct * vma,
111 return 0; 136 return 0;
112} 137}
113 138
114static long madvise_vma(struct vm_area_struct * vma, unsigned long start, 139static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
115 unsigned long end, int behavior) 140 unsigned long start, unsigned long end, int behavior)
116{ 141{
117 long error = -EBADF; 142 long error = -EBADF;
118 143
@@ -120,15 +145,15 @@ static long madvise_vma(struct vm_area_struct * vma, unsigned long start,
120 case MADV_NORMAL: 145 case MADV_NORMAL:
121 case MADV_SEQUENTIAL: 146 case MADV_SEQUENTIAL:
122 case MADV_RANDOM: 147 case MADV_RANDOM:
123 error = madvise_behavior(vma, start, end, behavior); 148 error = madvise_behavior(vma, prev, start, end, behavior);
124 break; 149 break;
125 150
126 case MADV_WILLNEED: 151 case MADV_WILLNEED:
127 error = madvise_willneed(vma, start, end); 152 error = madvise_willneed(vma, prev, start, end);
128 break; 153 break;
129 154
130 case MADV_DONTNEED: 155 case MADV_DONTNEED:
131 error = madvise_dontneed(vma, start, end); 156 error = madvise_dontneed(vma, prev, start, end);
132 break; 157 break;
133 158
134 default: 159 default:
@@ -175,8 +200,8 @@ static long madvise_vma(struct vm_area_struct * vma, unsigned long start,
175 */ 200 */
176asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) 201asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
177{ 202{
178 unsigned long end; 203 unsigned long end, tmp;
179 struct vm_area_struct * vma; 204 struct vm_area_struct * vma, *prev;
180 int unmapped_error = 0; 205 int unmapped_error = 0;
181 int error = -EINVAL; 206 int error = -EINVAL;
182 size_t len; 207 size_t len;
@@ -202,40 +227,42 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
202 /* 227 /*
203 * If the interval [start,end) covers some unmapped address 228 * If the interval [start,end) covers some unmapped address
204 * ranges, just ignore them, but return -ENOMEM at the end. 229 * ranges, just ignore them, but return -ENOMEM at the end.
230 * - different from the way of handling in mlock etc.
205 */ 231 */
206 vma = find_vma(current->mm, start); 232 vma = find_vma_prev(current->mm, start, &prev);
233 if (!vma && prev)
234 vma = prev->vm_next;
207 for (;;) { 235 for (;;) {
208 /* Still start < end. */ 236 /* Still start < end. */
209 error = -ENOMEM; 237 error = -ENOMEM;
210 if (!vma) 238 if (!vma)
211 goto out; 239 goto out;
212 240
213 /* Here start < vma->vm_end. */ 241 /* Here start < (end|vma->vm_end). */
214 if (start < vma->vm_start) { 242 if (start < vma->vm_start) {
215 unmapped_error = -ENOMEM; 243 unmapped_error = -ENOMEM;
216 start = vma->vm_start; 244 start = vma->vm_start;
245 if (start >= end)
246 goto out;
217 } 247 }
218 248
219 /* Here vma->vm_start <= start < vma->vm_end. */ 249 /* Here vma->vm_start <= start < (end|vma->vm_end) */
220 if (end <= vma->vm_end) { 250 tmp = vma->vm_end;
221 if (start < end) { 251 if (end < tmp)
222 error = madvise_vma(vma, start, end, 252 tmp = end;
223 behavior);
224 if (error)
225 goto out;
226 }
227 error = unmapped_error;
228 goto out;
229 }
230 253
231 /* Here vma->vm_start <= start < vma->vm_end < end. */ 254 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
232 error = madvise_vma(vma, start, vma->vm_end, behavior); 255 error = madvise_vma(vma, &prev, start, tmp, behavior);
233 if (error) 256 if (error)
234 goto out; 257 goto out;
235 start = vma->vm_end; 258 start = tmp;
236 vma = vma->vm_next; 259 if (start < prev->vm_end)
260 start = prev->vm_end;
261 error = unmapped_error;
262 if (start >= end)
263 goto out;
264 vma = prev->vm_next;
237 } 265 }
238
239out: 266out:
240 up_write(&current->mm->mmap_sem); 267 up_write(&current->mm->mmap_sem);
241 return error; 268 return error;