diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2005-10-29 21:16:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:39 -0400 |
commit | b5810039a54e5babf428e9a1e89fc1940fabff11 (patch) | |
tree | 835836cb527ec9bd525f93eb7e016f3dfb8c8ae2 /mm/mempolicy.c | |
parent | f9c98d0287de42221c624482fd4f8d485c98ab22 (diff) |
[PATCH] core remove PageReserved
Remove PageReserved() calls from core code by tightening VM_RESERVED
handling in mm/ to cover PageReserved functionality.
PageReserved special casing is removed from get_page and put_page.
All setting and clearing of PageReserved is retained, and it is now flagged
in the page_alloc checks to help ensure we don't introduce any refcount
based freeing of Reserved pages.
MAP_PRIVATE, PROT_WRITE of VM_RESERVED regions is tentatively being
deprecated. We never completely handled it correctly anyway, and is be
reintroduced in future if required (Hugh has a proof of concept).
Once PageReserved() calls are removed from kernel/power/swsusp.c, and all
arch/ and driver code, the Set and Clear calls, and the PG_reserved bit can
be trivially removed.
Last real user of PageReserved is swsusp, which uses PageReserved to
determine whether a struct page points to valid memory or not. This still
needs to be addressed (a generic page_is_ram() should work).
A last caveat: the ZERO_PAGE is now refcounted and managed with rmap (and
thus mapcounted and count towards shared rss). These writes to the struct
page could cause excessive cacheline bouncing on big systems. There are a
number of ways this could be addressed if it is an issue.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Refcount bug fix for filemap_xip.c
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 29 |
1 files changed, 16 insertions, 13 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 43b1199af591..11d824f282f1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -223,13 +223,13 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) | |||
223 | } | 223 | } |
224 | 224 | ||
225 | /* Ensure all existing pages follow the policy. */ | 225 | /* Ensure all existing pages follow the policy. */ |
226 | static int check_pte_range(struct mm_struct *mm, pmd_t *pmd, | 226 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
227 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 227 | unsigned long addr, unsigned long end, nodemask_t *nodes) |
228 | { | 228 | { |
229 | pte_t *orig_pte; | 229 | pte_t *orig_pte; |
230 | pte_t *pte; | 230 | pte_t *pte; |
231 | 231 | ||
232 | spin_lock(&mm->page_table_lock); | 232 | spin_lock(&vma->vm_mm->page_table_lock); |
233 | orig_pte = pte = pte_offset_map(pmd, addr); | 233 | orig_pte = pte = pte_offset_map(pmd, addr); |
234 | do { | 234 | do { |
235 | unsigned long pfn; | 235 | unsigned long pfn; |
@@ -238,18 +238,20 @@ static int check_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
238 | if (!pte_present(*pte)) | 238 | if (!pte_present(*pte)) |
239 | continue; | 239 | continue; |
240 | pfn = pte_pfn(*pte); | 240 | pfn = pte_pfn(*pte); |
241 | if (!pfn_valid(pfn)) | 241 | if (!pfn_valid(pfn)) { |
242 | print_bad_pte(vma, *pte, addr); | ||
242 | continue; | 243 | continue; |
244 | } | ||
243 | nid = pfn_to_nid(pfn); | 245 | nid = pfn_to_nid(pfn); |
244 | if (!node_isset(nid, *nodes)) | 246 | if (!node_isset(nid, *nodes)) |
245 | break; | 247 | break; |
246 | } while (pte++, addr += PAGE_SIZE, addr != end); | 248 | } while (pte++, addr += PAGE_SIZE, addr != end); |
247 | pte_unmap(orig_pte); | 249 | pte_unmap(orig_pte); |
248 | spin_unlock(&mm->page_table_lock); | 250 | spin_unlock(&vma->vm_mm->page_table_lock); |
249 | return addr != end; | 251 | return addr != end; |
250 | } | 252 | } |
251 | 253 | ||
252 | static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud, | 254 | static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, |
253 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 255 | unsigned long addr, unsigned long end, nodemask_t *nodes) |
254 | { | 256 | { |
255 | pmd_t *pmd; | 257 | pmd_t *pmd; |
@@ -260,13 +262,13 @@ static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud, | |||
260 | next = pmd_addr_end(addr, end); | 262 | next = pmd_addr_end(addr, end); |
261 | if (pmd_none_or_clear_bad(pmd)) | 263 | if (pmd_none_or_clear_bad(pmd)) |
262 | continue; | 264 | continue; |
263 | if (check_pte_range(mm, pmd, addr, next, nodes)) | 265 | if (check_pte_range(vma, pmd, addr, next, nodes)) |
264 | return -EIO; | 266 | return -EIO; |
265 | } while (pmd++, addr = next, addr != end); | 267 | } while (pmd++, addr = next, addr != end); |
266 | return 0; | 268 | return 0; |
267 | } | 269 | } |
268 | 270 | ||
269 | static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd, | 271 | static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, |
270 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 272 | unsigned long addr, unsigned long end, nodemask_t *nodes) |
271 | { | 273 | { |
272 | pud_t *pud; | 274 | pud_t *pud; |
@@ -277,24 +279,24 @@ static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd, | |||
277 | next = pud_addr_end(addr, end); | 279 | next = pud_addr_end(addr, end); |
278 | if (pud_none_or_clear_bad(pud)) | 280 | if (pud_none_or_clear_bad(pud)) |
279 | continue; | 281 | continue; |
280 | if (check_pmd_range(mm, pud, addr, next, nodes)) | 282 | if (check_pmd_range(vma, pud, addr, next, nodes)) |
281 | return -EIO; | 283 | return -EIO; |
282 | } while (pud++, addr = next, addr != end); | 284 | } while (pud++, addr = next, addr != end); |
283 | return 0; | 285 | return 0; |
284 | } | 286 | } |
285 | 287 | ||
286 | static inline int check_pgd_range(struct mm_struct *mm, | 288 | static inline int check_pgd_range(struct vm_area_struct *vma, |
287 | unsigned long addr, unsigned long end, nodemask_t *nodes) | 289 | unsigned long addr, unsigned long end, nodemask_t *nodes) |
288 | { | 290 | { |
289 | pgd_t *pgd; | 291 | pgd_t *pgd; |
290 | unsigned long next; | 292 | unsigned long next; |
291 | 293 | ||
292 | pgd = pgd_offset(mm, addr); | 294 | pgd = pgd_offset(vma->vm_mm, addr); |
293 | do { | 295 | do { |
294 | next = pgd_addr_end(addr, end); | 296 | next = pgd_addr_end(addr, end); |
295 | if (pgd_none_or_clear_bad(pgd)) | 297 | if (pgd_none_or_clear_bad(pgd)) |
296 | continue; | 298 | continue; |
297 | if (check_pud_range(mm, pgd, addr, next, nodes)) | 299 | if (check_pud_range(vma, pgd, addr, next, nodes)) |
298 | return -EIO; | 300 | return -EIO; |
299 | } while (pgd++, addr = next, addr != end); | 301 | } while (pgd++, addr = next, addr != end); |
300 | return 0; | 302 | return 0; |
@@ -311,6 +313,8 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
311 | first = find_vma(mm, start); | 313 | first = find_vma(mm, start); |
312 | if (!first) | 314 | if (!first) |
313 | return ERR_PTR(-EFAULT); | 315 | return ERR_PTR(-EFAULT); |
316 | if (first->vm_flags & VM_RESERVED) | ||
317 | return ERR_PTR(-EACCES); | ||
314 | prev = NULL; | 318 | prev = NULL; |
315 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { | 319 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { |
316 | if (!vma->vm_next && vma->vm_end < end) | 320 | if (!vma->vm_next && vma->vm_end < end) |
@@ -323,8 +327,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
323 | endvma = end; | 327 | endvma = end; |
324 | if (vma->vm_start > start) | 328 | if (vma->vm_start > start) |
325 | start = vma->vm_start; | 329 | start = vma->vm_start; |
326 | err = check_pgd_range(vma->vm_mm, | 330 | err = check_pgd_range(vma, start, endvma, nodes); |
327 | start, endvma, nodes); | ||
328 | if (err) { | 331 | if (err) { |
329 | first = ERR_PTR(err); | 332 | first = ERR_PTR(err); |
330 | break; | 333 | break; |