aboutsummaryrefslogtreecommitdiffstats
path: root/mm/msync.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/msync.c')
-rw-r--r--mm/msync.c139
1 files changed, 79 insertions, 60 deletions
diff --git a/mm/msync.c b/mm/msync.c
index 3563a56e1a51..bc6c95376366 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -9,20 +9,24 @@
9 */ 9 */
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/fs.h>
12#include <linux/mm.h> 13#include <linux/mm.h>
13#include <linux/mman.h> 14#include <linux/mman.h>
14#include <linux/hugetlb.h> 15#include <linux/hugetlb.h>
16#include <linux/writeback.h>
17#include <linux/file.h>
15#include <linux/syscalls.h> 18#include <linux/syscalls.h>
16 19
17#include <asm/pgtable.h> 20#include <asm/pgtable.h>
18#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
19 22
20static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 23static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
21 unsigned long addr, unsigned long end) 24 unsigned long addr, unsigned long end)
22{ 25{
23 pte_t *pte; 26 pte_t *pte;
24 spinlock_t *ptl; 27 spinlock_t *ptl;
25 int progress = 0; 28 int progress = 0;
29 unsigned long ret = 0;
26 30
27again: 31again:
28 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 32 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
@@ -43,58 +47,64 @@ again:
43 if (!page) 47 if (!page)
44 continue; 48 continue;
45 if (ptep_clear_flush_dirty(vma, addr, pte) || 49 if (ptep_clear_flush_dirty(vma, addr, pte) ||
46 page_test_and_clear_dirty(page)) 50 page_test_and_clear_dirty(page))
47 set_page_dirty(page); 51 ret += set_page_dirty(page);
48 progress += 3; 52 progress += 3;
49 } while (pte++, addr += PAGE_SIZE, addr != end); 53 } while (pte++, addr += PAGE_SIZE, addr != end);
50 pte_unmap_unlock(pte - 1, ptl); 54 pte_unmap_unlock(pte - 1, ptl);
51 cond_resched(); 55 cond_resched();
52 if (addr != end) 56 if (addr != end)
53 goto again; 57 goto again;
58 return ret;
54} 59}
55 60
56static inline void msync_pmd_range(struct vm_area_struct *vma, pud_t *pud, 61static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
57 unsigned long addr, unsigned long end) 62 pud_t *pud, unsigned long addr, unsigned long end)
58{ 63{
59 pmd_t *pmd; 64 pmd_t *pmd;
60 unsigned long next; 65 unsigned long next;
66 unsigned long ret = 0;
61 67
62 pmd = pmd_offset(pud, addr); 68 pmd = pmd_offset(pud, addr);
63 do { 69 do {
64 next = pmd_addr_end(addr, end); 70 next = pmd_addr_end(addr, end);
65 if (pmd_none_or_clear_bad(pmd)) 71 if (pmd_none_or_clear_bad(pmd))
66 continue; 72 continue;
67 msync_pte_range(vma, pmd, addr, next); 73 ret += msync_pte_range(vma, pmd, addr, next);
68 } while (pmd++, addr = next, addr != end); 74 } while (pmd++, addr = next, addr != end);
75 return ret;
69} 76}
70 77
71static inline void msync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 78static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
72 unsigned long addr, unsigned long end) 79 pgd_t *pgd, unsigned long addr, unsigned long end)
73{ 80{
74 pud_t *pud; 81 pud_t *pud;
75 unsigned long next; 82 unsigned long next;
83 unsigned long ret = 0;
76 84
77 pud = pud_offset(pgd, addr); 85 pud = pud_offset(pgd, addr);
78 do { 86 do {
79 next = pud_addr_end(addr, end); 87 next = pud_addr_end(addr, end);
80 if (pud_none_or_clear_bad(pud)) 88 if (pud_none_or_clear_bad(pud))
81 continue; 89 continue;
82 msync_pmd_range(vma, pud, addr, next); 90 ret += msync_pmd_range(vma, pud, addr, next);
83 } while (pud++, addr = next, addr != end); 91 } while (pud++, addr = next, addr != end);
92 return ret;
84} 93}
85 94
86static void msync_page_range(struct vm_area_struct *vma, 95static unsigned long msync_page_range(struct vm_area_struct *vma,
87 unsigned long addr, unsigned long end) 96 unsigned long addr, unsigned long end)
88{ 97{
89 pgd_t *pgd; 98 pgd_t *pgd;
90 unsigned long next; 99 unsigned long next;
100 unsigned long ret = 0;
91 101
92 /* For hugepages we can't go walking the page table normally, 102 /* For hugepages we can't go walking the page table normally,
93 * but that's ok, hugetlbfs is memory based, so we don't need 103 * but that's ok, hugetlbfs is memory based, so we don't need
94 * to do anything more on an msync(). 104 * to do anything more on an msync().
95 */ 105 */
96 if (vma->vm_flags & VM_HUGETLB) 106 if (vma->vm_flags & VM_HUGETLB)
97 return; 107 return 0;
98 108
99 BUG_ON(addr >= end); 109 BUG_ON(addr >= end);
100 pgd = pgd_offset(vma->vm_mm, addr); 110 pgd = pgd_offset(vma->vm_mm, addr);
@@ -103,8 +113,9 @@ static void msync_page_range(struct vm_area_struct *vma,
103 next = pgd_addr_end(addr, end); 113 next = pgd_addr_end(addr, end);
104 if (pgd_none_or_clear_bad(pgd)) 114 if (pgd_none_or_clear_bad(pgd))
105 continue; 115 continue;
106 msync_pud_range(vma, pgd, addr, next); 116 ret += msync_pud_range(vma, pgd, addr, next);
107 } while (pgd++, addr = next, addr != end); 117 } while (pgd++, addr = next, addr != end);
118 return ret;
108} 119}
109 120
110/* 121/*
@@ -115,53 +126,31 @@ static void msync_page_range(struct vm_area_struct *vma,
115 * write out the dirty pages and wait on the writeout and check the result. 126 * write out the dirty pages and wait on the writeout and check the result.
116 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start 127 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
117 * async writeout immediately. 128 * async writeout immediately.
118 * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to 129 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
119 * applications. 130 * applications.
120 */ 131 */
121static int msync_interval(struct vm_area_struct *vma, 132static int msync_interval(struct vm_area_struct *vma, unsigned long addr,
122 unsigned long addr, unsigned long end, int flags) 133 unsigned long end, int flags,
134 unsigned long *nr_pages_dirtied)
123{ 135{
124 int ret = 0;
125 struct file *file = vma->vm_file; 136 struct file *file = vma->vm_file;
126 137
127 if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED)) 138 if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
128 return -EBUSY; 139 return -EBUSY;
129 140
130 if (file && (vma->vm_flags & VM_SHARED)) { 141 if (file && (vma->vm_flags & VM_SHARED))
131 msync_page_range(vma, addr, end); 142 *nr_pages_dirtied = msync_page_range(vma, addr, end);
132 143 return 0;
133 if (flags & MS_SYNC) {
134 struct address_space *mapping = file->f_mapping;
135 int err;
136
137 ret = filemap_fdatawrite(mapping);
138 if (file->f_op && file->f_op->fsync) {
139 /*
140 * We don't take i_mutex here because mmap_sem
141 * is already held.
142 */
143 err = file->f_op->fsync(file,file->f_dentry,1);
144 if (err && !ret)
145 ret = err;
146 }
147 err = filemap_fdatawait(mapping);
148 if (!ret)
149 ret = err;
150 }
151 }
152 return ret;
153} 144}
154 145
155asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 146asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
156{ 147{
157 unsigned long end; 148 unsigned long end;
158 struct vm_area_struct *vma; 149 struct vm_area_struct *vma;
159 int unmapped_error, error = -EINVAL; 150 int unmapped_error = 0;
160 151 int error = -EINVAL;
161 if (flags & MS_SYNC) 152 int done = 0;
162 current->flags |= PF_SYNCWRITE;
163 153
164 down_read(&current->mm->mmap_sem);
165 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) 154 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
166 goto out; 155 goto out;
167 if (start & ~PAGE_MASK) 156 if (start & ~PAGE_MASK)
@@ -180,13 +169,18 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
180 * If the interval [start,end) covers some unmapped address ranges, 169 * If the interval [start,end) covers some unmapped address ranges,
181 * just ignore them, but return -ENOMEM at the end. 170 * just ignore them, but return -ENOMEM at the end.
182 */ 171 */
172 down_read(&current->mm->mmap_sem);
173 if (flags & MS_SYNC)
174 current->flags |= PF_SYNCWRITE;
183 vma = find_vma(current->mm, start); 175 vma = find_vma(current->mm, start);
184 unmapped_error = 0; 176 if (!vma) {
185 for (;;) {
186 /* Still start < end. */
187 error = -ENOMEM; 177 error = -ENOMEM;
188 if (!vma) 178 goto out_unlock;
189 goto out; 179 }
180 do {
181 unsigned long nr_pages_dirtied = 0;
182 struct file *file;
183
190 /* Here start < vma->vm_end. */ 184 /* Here start < vma->vm_end. */
191 if (start < vma->vm_start) { 185 if (start < vma->vm_start) {
192 unmapped_error = -ENOMEM; 186 unmapped_error = -ENOMEM;
@@ -195,22 +189,47 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
195 /* Here vma->vm_start <= start < vma->vm_end. */ 189 /* Here vma->vm_start <= start < vma->vm_end. */
196 if (end <= vma->vm_end) { 190 if (end <= vma->vm_end) {
197 if (start < end) { 191 if (start < end) {
198 error = msync_interval(vma, start, end, flags); 192 error = msync_interval(vma, start, end, flags,
193 &nr_pages_dirtied);
199 if (error) 194 if (error)
200 goto out; 195 goto out_unlock;
201 } 196 }
202 error = unmapped_error; 197 error = unmapped_error;
203 goto out; 198 done = 1;
199 } else {
200 /* Here vma->vm_start <= start < vma->vm_end < end. */
201 error = msync_interval(vma, start, vma->vm_end, flags,
202 &nr_pages_dirtied);
203 if (error)
204 goto out_unlock;
204 } 205 }
205 /* Here vma->vm_start <= start < vma->vm_end < end. */ 206 file = vma->vm_file;
206 error = msync_interval(vma, start, vma->vm_end, flags);
207 if (error)
208 goto out;
209 start = vma->vm_end; 207 start = vma->vm_end;
210 vma = vma->vm_next; 208 if ((flags & MS_ASYNC) && file && nr_pages_dirtied) {
211 } 209 get_file(file);
212out: 210 up_read(&current->mm->mmap_sem);
213 up_read(&current->mm->mmap_sem); 211 balance_dirty_pages_ratelimited_nr(file->f_mapping,
212 nr_pages_dirtied);
213 fput(file);
214 down_read(&current->mm->mmap_sem);
215 vma = find_vma(current->mm, start);
216 } else if ((flags & MS_SYNC) && file &&
217 (vma->vm_flags & VM_SHARED)) {
218 get_file(file);
219 up_read(&current->mm->mmap_sem);
220 error = do_fsync(file, 0);
221 fput(file);
222 down_read(&current->mm->mmap_sem);
223 if (error)
224 goto out_unlock;
225 vma = find_vma(current->mm, start);
226 } else {
227 vma = vma->vm_next;
228 }
229 } while (vma && !done);
230out_unlock:
214 current->flags &= ~PF_SYNCWRITE; 231 current->flags &= ~PF_SYNCWRITE;
232 up_read(&current->mm->mmap_sem);
233out:
215 return error; 234 return error;
216} 235}