diff options
Diffstat (limited to 'mm/madvise.c')
-rw-r--r-- | mm/madvise.c | 103 |
1 files changed, 65 insertions, 38 deletions
diff --git a/mm/madvise.c b/mm/madvise.c index 944b5e52d812..e3108054733c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -8,17 +8,47 @@ | |||
8 | #include <linux/mman.h> | 8 | #include <linux/mman.h> |
9 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
10 | #include <linux/syscalls.h> | 10 | #include <linux/syscalls.h> |
11 | #include <linux/mempolicy.h> | ||
11 | #include <linux/hugetlb.h> | 12 | #include <linux/hugetlb.h> |
12 | 13 | ||
13 | /* | 14 | /* |
14 | * We can potentially split a vm area into separate | 15 | * We can potentially split a vm area into separate |
15 | * areas, each area with its own behavior. | 16 | * areas, each area with its own behavior. |
16 | */ | 17 | */ |
17 | static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, | 18 | static long madvise_behavior(struct vm_area_struct * vma, |
18 | unsigned long end, int behavior) | 19 | struct vm_area_struct **prev, |
20 | unsigned long start, unsigned long end, int behavior) | ||
19 | { | 21 | { |
20 | struct mm_struct * mm = vma->vm_mm; | 22 | struct mm_struct * mm = vma->vm_mm; |
21 | int error = 0; | 23 | int error = 0; |
24 | pgoff_t pgoff; | ||
25 | int new_flags = vma->vm_flags & ~VM_READHINTMASK; | ||
26 | |||
27 | switch (behavior) { | ||
28 | case MADV_SEQUENTIAL: | ||
29 | new_flags |= VM_SEQ_READ; | ||
30 | break; | ||
31 | case MADV_RANDOM: | ||
32 | new_flags |= VM_RAND_READ; | ||
33 | break; | ||
34 | default: | ||
35 | break; | ||
36 | } | ||
37 | |||
38 | if (new_flags == vma->vm_flags) { | ||
39 | *prev = vma; | ||
40 | goto success; | ||
41 | } | ||
42 | |||
43 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | ||
44 | *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, | ||
45 | vma->vm_file, pgoff, vma_policy(vma)); | ||
46 | if (*prev) { | ||
47 | vma = *prev; | ||
48 | goto success; | ||
49 | } | ||
50 | |||
51 | *prev = vma; | ||
22 | 52 | ||
23 | if (start != vma->vm_start) { | 53 | if (start != vma->vm_start) { |
24 | error = split_vma(mm, vma, start, 1); | 54 | error = split_vma(mm, vma, start, 1); |
@@ -36,21 +66,12 @@ static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, | |||
36 | * vm_flags is protected by the mmap_sem held in write mode. | 66 | * vm_flags is protected by the mmap_sem held in write mode. |
37 | */ | 67 | */ |
38 | VM_ClearReadHint(vma); | 68 | VM_ClearReadHint(vma); |
39 | 69 | vma->vm_flags = new_flags; | |
40 | switch (behavior) { | ||
41 | case MADV_SEQUENTIAL: | ||
42 | vma->vm_flags |= VM_SEQ_READ; | ||
43 | break; | ||
44 | case MADV_RANDOM: | ||
45 | vma->vm_flags |= VM_RAND_READ; | ||
46 | break; | ||
47 | default: | ||
48 | break; | ||
49 | } | ||
50 | 70 | ||
51 | out: | 71 | out: |
52 | if (error == -ENOMEM) | 72 | if (error == -ENOMEM) |
53 | error = -EAGAIN; | 73 | error = -EAGAIN; |
74 | success: | ||
54 | return error; | 75 | return error; |
55 | } | 76 | } |
56 | 77 | ||
@@ -58,6 +79,7 @@ out: | |||
58 | * Schedule all required I/O operations. Do not wait for completion. | 79 | * Schedule all required I/O operations. Do not wait for completion. |
59 | */ | 80 | */ |
60 | static long madvise_willneed(struct vm_area_struct * vma, | 81 | static long madvise_willneed(struct vm_area_struct * vma, |
82 | struct vm_area_struct ** prev, | ||
61 | unsigned long start, unsigned long end) | 83 | unsigned long start, unsigned long end) |
62 | { | 84 | { |
63 | struct file *file = vma->vm_file; | 85 | struct file *file = vma->vm_file; |
@@ -65,6 +87,7 @@ static long madvise_willneed(struct vm_area_struct * vma, | |||
65 | if (!file) | 87 | if (!file) |
66 | return -EBADF; | 88 | return -EBADF; |
67 | 89 | ||
90 | *prev = vma; | ||
68 | start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 91 | start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
69 | if (end > vma->vm_end) | 92 | if (end > vma->vm_end) |
70 | end = vma->vm_end; | 93 | end = vma->vm_end; |
@@ -95,8 +118,10 @@ static long madvise_willneed(struct vm_area_struct * vma, | |||
95 | * dirty pages is already available as msync(MS_INVALIDATE). | 118 | * dirty pages is already available as msync(MS_INVALIDATE). |
96 | */ | 119 | */ |
97 | static long madvise_dontneed(struct vm_area_struct * vma, | 120 | static long madvise_dontneed(struct vm_area_struct * vma, |
121 | struct vm_area_struct ** prev, | ||
98 | unsigned long start, unsigned long end) | 122 | unsigned long start, unsigned long end) |
99 | { | 123 | { |
124 | *prev = vma; | ||
100 | if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma)) | 125 | if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma)) |
101 | return -EINVAL; | 126 | return -EINVAL; |
102 | 127 | ||
@@ -111,8 +136,8 @@ static long madvise_dontneed(struct vm_area_struct * vma, | |||
111 | return 0; | 136 | return 0; |
112 | } | 137 | } |
113 | 138 | ||
114 | static long madvise_vma(struct vm_area_struct * vma, unsigned long start, | 139 | static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, |
115 | unsigned long end, int behavior) | 140 | unsigned long start, unsigned long end, int behavior) |
116 | { | 141 | { |
117 | long error = -EBADF; | 142 | long error = -EBADF; |
118 | 143 | ||
@@ -120,15 +145,15 @@ static long madvise_vma(struct vm_area_struct * vma, unsigned long start, | |||
120 | case MADV_NORMAL: | 145 | case MADV_NORMAL: |
121 | case MADV_SEQUENTIAL: | 146 | case MADV_SEQUENTIAL: |
122 | case MADV_RANDOM: | 147 | case MADV_RANDOM: |
123 | error = madvise_behavior(vma, start, end, behavior); | 148 | error = madvise_behavior(vma, prev, start, end, behavior); |
124 | break; | 149 | break; |
125 | 150 | ||
126 | case MADV_WILLNEED: | 151 | case MADV_WILLNEED: |
127 | error = madvise_willneed(vma, start, end); | 152 | error = madvise_willneed(vma, prev, start, end); |
128 | break; | 153 | break; |
129 | 154 | ||
130 | case MADV_DONTNEED: | 155 | case MADV_DONTNEED: |
131 | error = madvise_dontneed(vma, start, end); | 156 | error = madvise_dontneed(vma, prev, start, end); |
132 | break; | 157 | break; |
133 | 158 | ||
134 | default: | 159 | default: |
@@ -175,8 +200,8 @@ static long madvise_vma(struct vm_area_struct * vma, unsigned long start, | |||
175 | */ | 200 | */ |
176 | asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) | 201 | asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) |
177 | { | 202 | { |
178 | unsigned long end; | 203 | unsigned long end, tmp; |
179 | struct vm_area_struct * vma; | 204 | struct vm_area_struct * vma, *prev; |
180 | int unmapped_error = 0; | 205 | int unmapped_error = 0; |
181 | int error = -EINVAL; | 206 | int error = -EINVAL; |
182 | size_t len; | 207 | size_t len; |
@@ -202,40 +227,42 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) | |||
202 | /* | 227 | /* |
203 | * If the interval [start,end) covers some unmapped address | 228 | * If the interval [start,end) covers some unmapped address |
204 | * ranges, just ignore them, but return -ENOMEM at the end. | 229 | * ranges, just ignore them, but return -ENOMEM at the end. |
230 | * - different from the way of handling in mlock etc. | ||
205 | */ | 231 | */ |
206 | vma = find_vma(current->mm, start); | 232 | vma = find_vma_prev(current->mm, start, &prev); |
233 | if (!vma && prev) | ||
234 | vma = prev->vm_next; | ||
207 | for (;;) { | 235 | for (;;) { |
208 | /* Still start < end. */ | 236 | /* Still start < end. */ |
209 | error = -ENOMEM; | 237 | error = -ENOMEM; |
210 | if (!vma) | 238 | if (!vma) |
211 | goto out; | 239 | goto out; |
212 | 240 | ||
213 | /* Here start < vma->vm_end. */ | 241 | /* Here start < (end|vma->vm_end). */ |
214 | if (start < vma->vm_start) { | 242 | if (start < vma->vm_start) { |
215 | unmapped_error = -ENOMEM; | 243 | unmapped_error = -ENOMEM; |
216 | start = vma->vm_start; | 244 | start = vma->vm_start; |
245 | if (start >= end) | ||
246 | goto out; | ||
217 | } | 247 | } |
218 | 248 | ||
219 | /* Here vma->vm_start <= start < vma->vm_end. */ | 249 | /* Here vma->vm_start <= start < (end|vma->vm_end) */ |
220 | if (end <= vma->vm_end) { | 250 | tmp = vma->vm_end; |
221 | if (start < end) { | 251 | if (end < tmp) |
222 | error = madvise_vma(vma, start, end, | 252 | tmp = end; |
223 | behavior); | ||
224 | if (error) | ||
225 | goto out; | ||
226 | } | ||
227 | error = unmapped_error; | ||
228 | goto out; | ||
229 | } | ||
230 | 253 | ||
231 | /* Here vma->vm_start <= start < vma->vm_end < end. */ | 254 | /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ |
232 | error = madvise_vma(vma, start, vma->vm_end, behavior); | 255 | error = madvise_vma(vma, &prev, start, tmp, behavior); |
233 | if (error) | 256 | if (error) |
234 | goto out; | 257 | goto out; |
235 | start = vma->vm_end; | 258 | start = tmp; |
236 | vma = vma->vm_next; | 259 | if (start < prev->vm_end) |
260 | start = prev->vm_end; | ||
261 | error = unmapped_error; | ||
262 | if (start >= end) | ||
263 | goto out; | ||
264 | vma = prev->vm_next; | ||
237 | } | 265 | } |
238 | |||
239 | out: | 266 | out: |
240 | up_write(¤t->mm->mmap_sem); | 267 | up_write(¤t->mm->mmap_sem); |
241 | return error; | 268 | return error; |