aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/msync.c93
1 files changed, 61 insertions, 32 deletions
diff --git a/mm/msync.c b/mm/msync.c
index 3563a56e1a51..8a66f5d5d4f0 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -12,17 +12,20 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/mman.h> 13#include <linux/mman.h>
14#include <linux/hugetlb.h> 14#include <linux/hugetlb.h>
15#include <linux/writeback.h>
16#include <linux/file.h>
15#include <linux/syscalls.h> 17#include <linux/syscalls.h>
16 18
17#include <asm/pgtable.h> 19#include <asm/pgtable.h>
18#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
19 21
20static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 22static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
21 unsigned long addr, unsigned long end) 23 unsigned long addr, unsigned long end)
22{ 24{
23 pte_t *pte; 25 pte_t *pte;
24 spinlock_t *ptl; 26 spinlock_t *ptl;
25 int progress = 0; 27 int progress = 0;
28 unsigned long ret = 0;
26 29
27again: 30again:
28 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 31 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
@@ -43,58 +46,64 @@ again:
43 if (!page) 46 if (!page)
44 continue; 47 continue;
45 if (ptep_clear_flush_dirty(vma, addr, pte) || 48 if (ptep_clear_flush_dirty(vma, addr, pte) ||
46 page_test_and_clear_dirty(page)) 49 page_test_and_clear_dirty(page))
47 set_page_dirty(page); 50 ret += set_page_dirty(page);
48 progress += 3; 51 progress += 3;
49 } while (pte++, addr += PAGE_SIZE, addr != end); 52 } while (pte++, addr += PAGE_SIZE, addr != end);
50 pte_unmap_unlock(pte - 1, ptl); 53 pte_unmap_unlock(pte - 1, ptl);
51 cond_resched(); 54 cond_resched();
52 if (addr != end) 55 if (addr != end)
53 goto again; 56 goto again;
57 return ret;
54} 58}
55 59
56static inline void msync_pmd_range(struct vm_area_struct *vma, pud_t *pud, 60static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
57 unsigned long addr, unsigned long end) 61 pud_t *pud, unsigned long addr, unsigned long end)
58{ 62{
59 pmd_t *pmd; 63 pmd_t *pmd;
60 unsigned long next; 64 unsigned long next;
65 unsigned long ret = 0;
61 66
62 pmd = pmd_offset(pud, addr); 67 pmd = pmd_offset(pud, addr);
63 do { 68 do {
64 next = pmd_addr_end(addr, end); 69 next = pmd_addr_end(addr, end);
65 if (pmd_none_or_clear_bad(pmd)) 70 if (pmd_none_or_clear_bad(pmd))
66 continue; 71 continue;
67 msync_pte_range(vma, pmd, addr, next); 72 ret += msync_pte_range(vma, pmd, addr, next);
68 } while (pmd++, addr = next, addr != end); 73 } while (pmd++, addr = next, addr != end);
74 return ret;
69} 75}
70 76
71static inline void msync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 77static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
72 unsigned long addr, unsigned long end) 78 pgd_t *pgd, unsigned long addr, unsigned long end)
73{ 79{
74 pud_t *pud; 80 pud_t *pud;
75 unsigned long next; 81 unsigned long next;
82 unsigned long ret = 0;
76 83
77 pud = pud_offset(pgd, addr); 84 pud = pud_offset(pgd, addr);
78 do { 85 do {
79 next = pud_addr_end(addr, end); 86 next = pud_addr_end(addr, end);
80 if (pud_none_or_clear_bad(pud)) 87 if (pud_none_or_clear_bad(pud))
81 continue; 88 continue;
82 msync_pmd_range(vma, pud, addr, next); 89 ret += msync_pmd_range(vma, pud, addr, next);
83 } while (pud++, addr = next, addr != end); 90 } while (pud++, addr = next, addr != end);
91 return ret;
84} 92}
85 93
86static void msync_page_range(struct vm_area_struct *vma, 94static unsigned long msync_page_range(struct vm_area_struct *vma,
87 unsigned long addr, unsigned long end) 95 unsigned long addr, unsigned long end)
88{ 96{
89 pgd_t *pgd; 97 pgd_t *pgd;
90 unsigned long next; 98 unsigned long next;
99 unsigned long ret = 0;
91 100
92 /* For hugepages we can't go walking the page table normally, 101 /* For hugepages we can't go walking the page table normally,
93 * but that's ok, hugetlbfs is memory based, so we don't need 102 * but that's ok, hugetlbfs is memory based, so we don't need
94 * to do anything more on an msync(). 103 * to do anything more on an msync().
95 */ 104 */
96 if (vma->vm_flags & VM_HUGETLB) 105 if (vma->vm_flags & VM_HUGETLB)
97 return; 106 return 0;
98 107
99 BUG_ON(addr >= end); 108 BUG_ON(addr >= end);
100 pgd = pgd_offset(vma->vm_mm, addr); 109 pgd = pgd_offset(vma->vm_mm, addr);
@@ -103,8 +112,9 @@ static void msync_page_range(struct vm_area_struct *vma,
103 next = pgd_addr_end(addr, end); 112 next = pgd_addr_end(addr, end);
104 if (pgd_none_or_clear_bad(pgd)) 113 if (pgd_none_or_clear_bad(pgd))
105 continue; 114 continue;
106 msync_pud_range(vma, pgd, addr, next); 115 ret += msync_pud_range(vma, pgd, addr, next);
107 } while (pgd++, addr = next, addr != end); 116 } while (pgd++, addr = next, addr != end);
117 return ret;
108} 118}
109 119
110/* 120/*
@@ -118,8 +128,9 @@ static void msync_page_range(struct vm_area_struct *vma,
118 * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to 128 * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
119 * applications. 129 * applications.
120 */ 130 */
121static int msync_interval(struct vm_area_struct *vma, 131static int msync_interval(struct vm_area_struct *vma, unsigned long addr,
122 unsigned long addr, unsigned long end, int flags) 132 unsigned long end, int flags,
133 unsigned long *nr_pages_dirtied)
123{ 134{
124 int ret = 0; 135 int ret = 0;
125 struct file *file = vma->vm_file; 136 struct file *file = vma->vm_file;
@@ -128,7 +139,7 @@ static int msync_interval(struct vm_area_struct *vma,
128 return -EBUSY; 139 return -EBUSY;
129 140
130 if (file && (vma->vm_flags & VM_SHARED)) { 141 if (file && (vma->vm_flags & VM_SHARED)) {
131 msync_page_range(vma, addr, end); 142 *nr_pages_dirtied = msync_page_range(vma, addr, end);
132 143
133 if (flags & MS_SYNC) { 144 if (flags & MS_SYNC) {
134 struct address_space *mapping = file->f_mapping; 145 struct address_space *mapping = file->f_mapping;
@@ -157,11 +168,8 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
157 unsigned long end; 168 unsigned long end;
158 struct vm_area_struct *vma; 169 struct vm_area_struct *vma;
159 int unmapped_error, error = -EINVAL; 170 int unmapped_error, error = -EINVAL;
171 int done = 0;
160 172
161 if (flags & MS_SYNC)
162 current->flags |= PF_SYNCWRITE;
163
164 down_read(&current->mm->mmap_sem);
165 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) 173 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
166 goto out; 174 goto out;
167 if (start & ~PAGE_MASK) 175 if (start & ~PAGE_MASK)
@@ -180,13 +188,19 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
180 * If the interval [start,end) covers some unmapped address ranges, 188 * If the interval [start,end) covers some unmapped address ranges,
181 * just ignore them, but return -ENOMEM at the end. 189 * just ignore them, but return -ENOMEM at the end.
182 */ 190 */
191 down_read(&current->mm->mmap_sem);
192 if (flags & MS_SYNC)
193 current->flags |= PF_SYNCWRITE;
183 vma = find_vma(current->mm, start); 194 vma = find_vma(current->mm, start);
184 unmapped_error = 0; 195 unmapped_error = 0;
185 for (;;) { 196 do {
197 unsigned long nr_pages_dirtied = 0;
198 struct file *file;
199
186 /* Still start < end. */ 200 /* Still start < end. */
187 error = -ENOMEM; 201 error = -ENOMEM;
188 if (!vma) 202 if (!vma)
189 goto out; 203 goto out_unlock;
190 /* Here start < vma->vm_end. */ 204 /* Here start < vma->vm_end. */
191 if (start < vma->vm_start) { 205 if (start < vma->vm_start) {
192 unmapped_error = -ENOMEM; 206 unmapped_error = -ENOMEM;
@@ -195,22 +209,37 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
195 /* Here vma->vm_start <= start < vma->vm_end. */ 209 /* Here vma->vm_start <= start < vma->vm_end. */
196 if (end <= vma->vm_end) { 210 if (end <= vma->vm_end) {
197 if (start < end) { 211 if (start < end) {
198 error = msync_interval(vma, start, end, flags); 212 error = msync_interval(vma, start, end, flags,
213 &nr_pages_dirtied);
199 if (error) 214 if (error)
200 goto out; 215 goto out_unlock;
201 } 216 }
202 error = unmapped_error; 217 error = unmapped_error;
203 goto out; 218 done = 1;
219 } else {
220 /* Here vma->vm_start <= start < vma->vm_end < end. */
221 error = msync_interval(vma, start, vma->vm_end, flags,
222 &nr_pages_dirtied);
223 if (error)
224 goto out_unlock;
204 } 225 }
205 /* Here vma->vm_start <= start < vma->vm_end < end. */ 226 file = vma->vm_file;
206 error = msync_interval(vma, start, vma->vm_end, flags);
207 if (error)
208 goto out;
209 start = vma->vm_end; 227 start = vma->vm_end;
210 vma = vma->vm_next; 228 if ((flags & MS_ASYNC) && file && nr_pages_dirtied) {
211 } 229 get_file(file);
212out: 230 up_read(&current->mm->mmap_sem);
213 up_read(&current->mm->mmap_sem); 231 balance_dirty_pages_ratelimited_nr(file->f_mapping,
232 nr_pages_dirtied);
233 fput(file);
234 down_read(&current->mm->mmap_sem);
235 vma = find_vma(current->mm, start);
236 } else {
237 vma = vma->vm_next;
238 }
239 } while (!done);
240out_unlock:
214 current->flags &= ~PF_SYNCWRITE; 241 current->flags &= ~PF_SYNCWRITE;
242 up_read(&current->mm->mmap_sem);
243out:
215 return error; 244 return error;
216} 245}