aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-10-29 21:16:12 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 00:40:39 -0400
commitb5810039a54e5babf428e9a1e89fc1940fabff11 (patch)
tree835836cb527ec9bd525f93eb7e016f3dfb8c8ae2 /mm/mempolicy.c
parentf9c98d0287de42221c624482fd4f8d485c98ab22 (diff)
[PATCH] core remove PageReserved
Remove PageReserved() calls from core code by tightening VM_RESERVED handling in mm/ to cover PageReserved functionality. PageReserved special casing is removed from get_page and put_page. All setting and clearing of PageReserved is retained, and it is now flagged in the page_alloc checks to help ensure we don't introduce any refcount based freeing of Reserved pages. MAP_PRIVATE, PROT_WRITE of VM_RESERVED regions is tentatively being deprecated. We never completely handled it correctly anyway, and is be reintroduced in future if required (Hugh has a proof of concept). Once PageReserved() calls are removed from kernel/power/swsusp.c, and all arch/ and driver code, the Set and Clear calls, and the PG_reserved bit can be trivially removed. Last real user of PageReserved is swsusp, which uses PageReserved to determine whether a struct page points to valid memory or not. This still needs to be addressed (a generic page_is_ram() should work). A last caveat: the ZERO_PAGE is now refcounted and managed with rmap (and thus mapcounted and count towards shared rss). These writes to the struct page could cause excessive cacheline bouncing on big systems. There are a number of ways this could be addressed if it is an issue. Signed-off-by: Nick Piggin <npiggin@suse.de> Refcount bug fix for filemap_xip.c Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 43b1199af591..11d824f282f1 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -223,13 +223,13 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
223} 223}
224 224
225/* Ensure all existing pages follow the policy. */ 225/* Ensure all existing pages follow the policy. */
226static int check_pte_range(struct mm_struct *mm, pmd_t *pmd, 226static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
227 unsigned long addr, unsigned long end, nodemask_t *nodes) 227 unsigned long addr, unsigned long end, nodemask_t *nodes)
228{ 228{
229 pte_t *orig_pte; 229 pte_t *orig_pte;
230 pte_t *pte; 230 pte_t *pte;
231 231
232 spin_lock(&mm->page_table_lock); 232 spin_lock(&vma->vm_mm->page_table_lock);
233 orig_pte = pte = pte_offset_map(pmd, addr); 233 orig_pte = pte = pte_offset_map(pmd, addr);
234 do { 234 do {
235 unsigned long pfn; 235 unsigned long pfn;
@@ -238,18 +238,20 @@ static int check_pte_range(struct mm_struct *mm, pmd_t *pmd,
238 if (!pte_present(*pte)) 238 if (!pte_present(*pte))
239 continue; 239 continue;
240 pfn = pte_pfn(*pte); 240 pfn = pte_pfn(*pte);
241 if (!pfn_valid(pfn)) 241 if (!pfn_valid(pfn)) {
242 print_bad_pte(vma, *pte, addr);
242 continue; 243 continue;
244 }
243 nid = pfn_to_nid(pfn); 245 nid = pfn_to_nid(pfn);
244 if (!node_isset(nid, *nodes)) 246 if (!node_isset(nid, *nodes))
245 break; 247 break;
246 } while (pte++, addr += PAGE_SIZE, addr != end); 248 } while (pte++, addr += PAGE_SIZE, addr != end);
247 pte_unmap(orig_pte); 249 pte_unmap(orig_pte);
248 spin_unlock(&mm->page_table_lock); 250 spin_unlock(&vma->vm_mm->page_table_lock);
249 return addr != end; 251 return addr != end;
250} 252}
251 253
252static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud, 254static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
253 unsigned long addr, unsigned long end, nodemask_t *nodes) 255 unsigned long addr, unsigned long end, nodemask_t *nodes)
254{ 256{
255 pmd_t *pmd; 257 pmd_t *pmd;
@@ -260,13 +262,13 @@ static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud,
260 next = pmd_addr_end(addr, end); 262 next = pmd_addr_end(addr, end);
261 if (pmd_none_or_clear_bad(pmd)) 263 if (pmd_none_or_clear_bad(pmd))
262 continue; 264 continue;
263 if (check_pte_range(mm, pmd, addr, next, nodes)) 265 if (check_pte_range(vma, pmd, addr, next, nodes))
264 return -EIO; 266 return -EIO;
265 } while (pmd++, addr = next, addr != end); 267 } while (pmd++, addr = next, addr != end);
266 return 0; 268 return 0;
267} 269}
268 270
269static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd, 271static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
270 unsigned long addr, unsigned long end, nodemask_t *nodes) 272 unsigned long addr, unsigned long end, nodemask_t *nodes)
271{ 273{
272 pud_t *pud; 274 pud_t *pud;
@@ -277,24 +279,24 @@ static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd,
277 next = pud_addr_end(addr, end); 279 next = pud_addr_end(addr, end);
278 if (pud_none_or_clear_bad(pud)) 280 if (pud_none_or_clear_bad(pud))
279 continue; 281 continue;
280 if (check_pmd_range(mm, pud, addr, next, nodes)) 282 if (check_pmd_range(vma, pud, addr, next, nodes))
281 return -EIO; 283 return -EIO;
282 } while (pud++, addr = next, addr != end); 284 } while (pud++, addr = next, addr != end);
283 return 0; 285 return 0;
284} 286}
285 287
286static inline int check_pgd_range(struct mm_struct *mm, 288static inline int check_pgd_range(struct vm_area_struct *vma,
287 unsigned long addr, unsigned long end, nodemask_t *nodes) 289 unsigned long addr, unsigned long end, nodemask_t *nodes)
288{ 290{
289 pgd_t *pgd; 291 pgd_t *pgd;
290 unsigned long next; 292 unsigned long next;
291 293
292 pgd = pgd_offset(mm, addr); 294 pgd = pgd_offset(vma->vm_mm, addr);
293 do { 295 do {
294 next = pgd_addr_end(addr, end); 296 next = pgd_addr_end(addr, end);
295 if (pgd_none_or_clear_bad(pgd)) 297 if (pgd_none_or_clear_bad(pgd))
296 continue; 298 continue;
297 if (check_pud_range(mm, pgd, addr, next, nodes)) 299 if (check_pud_range(vma, pgd, addr, next, nodes))
298 return -EIO; 300 return -EIO;
299 } while (pgd++, addr = next, addr != end); 301 } while (pgd++, addr = next, addr != end);
300 return 0; 302 return 0;
@@ -311,6 +313,8 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
311 first = find_vma(mm, start); 313 first = find_vma(mm, start);
312 if (!first) 314 if (!first)
313 return ERR_PTR(-EFAULT); 315 return ERR_PTR(-EFAULT);
316 if (first->vm_flags & VM_RESERVED)
317 return ERR_PTR(-EACCES);
314 prev = NULL; 318 prev = NULL;
315 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { 319 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
316 if (!vma->vm_next && vma->vm_end < end) 320 if (!vma->vm_next && vma->vm_end < end)
@@ -323,8 +327,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
323 endvma = end; 327 endvma = end;
324 if (vma->vm_start > start) 328 if (vma->vm_start > start)
325 start = vma->vm_start; 329 start = vma->vm_start;
326 err = check_pgd_range(vma->vm_mm, 330 err = check_pgd_range(vma, start, endvma, nodes);
327 start, endvma, nodes);
328 if (err) { 331 if (err) {
329 first = ERR_PTR(err); 332 first = ERR_PTR(err);
330 break; 333 break;