diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 6 | ||||
-rw-r--r-- | mm/fremap.c | 28 | ||||
-rw-r--r-- | mm/hugetlb.c | 6 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 213 | ||||
-rw-r--r-- | mm/mempolicy.c | 12 | ||||
-rw-r--r-- | mm/mmap.c | 11 | ||||
-rw-r--r-- | mm/mprotect.c | 8 | ||||
-rw-r--r-- | mm/msync.c | 12 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 75 | ||||
-rw-r--r-- | mm/rmap.c | 58 | ||||
-rw-r--r-- | mm/swap.c | 3 | ||||
-rw-r--r-- | mm/thrash.c | 10 | ||||
-rw-r--r-- | mm/truncate.c | 6 | ||||
-rw-r--r-- | mm/vmscan.c | 29 |
16 files changed, 273 insertions, 208 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index ae9ce6b73e8a..21eb51d4da8f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -125,12 +125,10 @@ comment "Memory hotplug is currently incompatible with Software Suspend" | |||
125 | # space can be handled with less contention: split it at this NR_CPUS. | 125 | # space can be handled with less contention: split it at this NR_CPUS. |
126 | # Default to 4 for wider testing, though 8 might be more appropriate. | 126 | # Default to 4 for wider testing, though 8 might be more appropriate. |
127 | # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. | 127 | # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. |
128 | # PA-RISC's debug spinlock_t is too large for the 32-bit struct page. | 128 | # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes. |
129 | # ARM26 and SPARC32 and PPC64 may use one page for multiple page tables. | ||
130 | # | 129 | # |
131 | config SPLIT_PTLOCK_CPUS | 130 | config SPLIT_PTLOCK_CPUS |
132 | int | 131 | int |
133 | default "4096" if ARM && !CPU_CACHE_VIPT | 132 | default "4096" if ARM && !CPU_CACHE_VIPT |
134 | default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT | 133 | default "4096" if PARISC && !PA20 |
135 | default "4096" if ARM26 || SPARC32 || PPC64 | ||
136 | default "4" | 134 | default "4" |
diff --git a/mm/fremap.c b/mm/fremap.c index d862be3bc3e3..f851775e09c2 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -27,24 +27,20 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, | |||
27 | struct page *page = NULL; | 27 | struct page *page = NULL; |
28 | 28 | ||
29 | if (pte_present(pte)) { | 29 | if (pte_present(pte)) { |
30 | unsigned long pfn = pte_pfn(pte); | 30 | flush_cache_page(vma, addr, pte_pfn(pte)); |
31 | flush_cache_page(vma, addr, pfn); | ||
32 | pte = ptep_clear_flush(vma, addr, ptep); | 31 | pte = ptep_clear_flush(vma, addr, ptep); |
33 | if (unlikely(!pfn_valid(pfn))) { | 32 | page = vm_normal_page(vma, addr, pte); |
34 | print_bad_pte(vma, pte, addr); | 33 | if (page) { |
35 | goto out; | 34 | if (pte_dirty(pte)) |
35 | set_page_dirty(page); | ||
36 | page_remove_rmap(page); | ||
37 | page_cache_release(page); | ||
36 | } | 38 | } |
37 | page = pfn_to_page(pfn); | ||
38 | if (pte_dirty(pte)) | ||
39 | set_page_dirty(page); | ||
40 | page_remove_rmap(page); | ||
41 | page_cache_release(page); | ||
42 | } else { | 39 | } else { |
43 | if (!pte_file(pte)) | 40 | if (!pte_file(pte)) |
44 | free_swap_and_cache(pte_to_swp_entry(pte)); | 41 | free_swap_and_cache(pte_to_swp_entry(pte)); |
45 | pte_clear(mm, addr, ptep); | 42 | pte_clear(mm, addr, ptep); |
46 | } | 43 | } |
47 | out: | ||
48 | return !!page; | 44 | return !!page; |
49 | } | 45 | } |
50 | 46 | ||
@@ -65,8 +61,6 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
65 | pte_t pte_val; | 61 | pte_t pte_val; |
66 | spinlock_t *ptl; | 62 | spinlock_t *ptl; |
67 | 63 | ||
68 | BUG_ON(vma->vm_flags & VM_RESERVED); | ||
69 | |||
70 | pgd = pgd_offset(mm, addr); | 64 | pgd = pgd_offset(mm, addr); |
71 | pud = pud_alloc(mm, pgd, addr); | 65 | pud = pud_alloc(mm, pgd, addr); |
72 | if (!pud) | 66 | if (!pud) |
@@ -122,8 +116,6 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, | |||
122 | pte_t pte_val; | 116 | pte_t pte_val; |
123 | spinlock_t *ptl; | 117 | spinlock_t *ptl; |
124 | 118 | ||
125 | BUG_ON(vma->vm_flags & VM_RESERVED); | ||
126 | |||
127 | pgd = pgd_offset(mm, addr); | 119 | pgd = pgd_offset(mm, addr); |
128 | pud = pud_alloc(mm, pgd, addr); | 120 | pud = pud_alloc(mm, pgd, addr); |
129 | if (!pud) | 121 | if (!pud) |
@@ -204,12 +196,10 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, | |||
204 | * Make sure the vma is shared, that it supports prefaulting, | 196 | * Make sure the vma is shared, that it supports prefaulting, |
205 | * and that the remapped range is valid and fully within | 197 | * and that the remapped range is valid and fully within |
206 | * the single existing vma. vm_private_data is used as a | 198 | * the single existing vma. vm_private_data is used as a |
207 | * swapout cursor in a VM_NONLINEAR vma (unless VM_RESERVED | 199 | * swapout cursor in a VM_NONLINEAR vma. |
208 | * or VM_LOCKED, but VM_LOCKED could be revoked later on). | ||
209 | */ | 200 | */ |
210 | if (vma && (vma->vm_flags & VM_SHARED) && | 201 | if (vma && (vma->vm_flags & VM_SHARED) && |
211 | (!vma->vm_private_data || | 202 | (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && |
212 | (vma->vm_flags & (VM_NONLINEAR|VM_RESERVED))) && | ||
213 | vma->vm_ops && vma->vm_ops->populate && | 203 | vma->vm_ops && vma->vm_ops->populate && |
214 | end > start && start >= vma->vm_start && | 204 | end > start && start >= vma->vm_start && |
215 | end <= vma->vm_end) { | 205 | end <= vma->vm_end) { |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 728e9bda12ea..3e52df7c471b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -22,6 +22,10 @@ unsigned long max_huge_pages; | |||
22 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | 22 | static struct list_head hugepage_freelists[MAX_NUMNODES]; |
23 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | 23 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; |
24 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; | 24 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; |
25 | |||
26 | /* | ||
27 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | ||
28 | */ | ||
25 | static DEFINE_SPINLOCK(hugetlb_lock); | 29 | static DEFINE_SPINLOCK(hugetlb_lock); |
26 | 30 | ||
27 | static void enqueue_huge_page(struct page *page) | 31 | static void enqueue_huge_page(struct page *page) |
@@ -61,8 +65,10 @@ static struct page *alloc_fresh_huge_page(void) | |||
61 | HUGETLB_PAGE_ORDER); | 65 | HUGETLB_PAGE_ORDER); |
62 | nid = (nid + 1) % num_online_nodes(); | 66 | nid = (nid + 1) % num_online_nodes(); |
63 | if (page) { | 67 | if (page) { |
68 | spin_lock(&hugetlb_lock); | ||
64 | nr_huge_pages++; | 69 | nr_huge_pages++; |
65 | nr_huge_pages_node[page_to_nid(page)]++; | 70 | nr_huge_pages_node[page_to_nid(page)]++; |
71 | spin_unlock(&hugetlb_lock); | ||
66 | } | 72 | } |
67 | return page; | 73 | return page; |
68 | } | 74 | } |
diff --git a/mm/madvise.c b/mm/madvise.c index 17aaf3e16449..2b7cf0400a21 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma, | |||
126 | unsigned long start, unsigned long end) | 126 | unsigned long start, unsigned long end) |
127 | { | 127 | { |
128 | *prev = vma; | 128 | *prev = vma; |
129 | if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED)) | 129 | if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) |
130 | return -EINVAL; | 130 | return -EINVAL; |
131 | 131 | ||
132 | if (unlikely(vma->vm_flags & VM_NONLINEAR)) { | 132 | if (unlikely(vma->vm_flags & VM_NONLINEAR)) { |
diff --git a/mm/memory.c b/mm/memory.c index 2998cfc12f5b..6c1eac92a316 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -333,9 +333,9 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
333 | } | 333 | } |
334 | 334 | ||
335 | /* | 335 | /* |
336 | * This function is called to print an error when a pte in a | 336 | * This function is called to print an error when a bad pte |
337 | * !VM_RESERVED region is found pointing to an invalid pfn (which | 337 | * is found. For example, we might have a PFN-mapped pte in |
338 | * is an error. | 338 | * a region that doesn't allow it. |
339 | * | 339 | * |
340 | * The calling function must still handle the error. | 340 | * The calling function must still handle the error. |
341 | */ | 341 | */ |
@@ -350,6 +350,59 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) | |||
350 | } | 350 | } |
351 | 351 | ||
352 | /* | 352 | /* |
353 | * This function gets the "struct page" associated with a pte. | ||
354 | * | ||
355 | * NOTE! Some mappings do not have "struct pages". A raw PFN mapping | ||
356 | * will have each page table entry just pointing to a raw page frame | ||
357 | * number, and as far as the VM layer is concerned, those do not have | ||
358 | * pages associated with them - even if the PFN might point to memory | ||
359 | * that otherwise is perfectly fine and has a "struct page". | ||
360 | * | ||
361 | * The way we recognize those mappings is through the rules set up | ||
362 | * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set, | ||
363 | * and the vm_pgoff will point to the first PFN mapped: thus every | ||
364 | * page that is a raw mapping will always honor the rule | ||
365 | * | ||
366 | * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) | ||
367 | * | ||
368 | * and if that isn't true, the page has been COW'ed (in which case it | ||
369 | * _does_ have a "struct page" associated with it even if it is in a | ||
370 | * VM_PFNMAP range). | ||
371 | */ | ||
372 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) | ||
373 | { | ||
374 | unsigned long pfn = pte_pfn(pte); | ||
375 | |||
376 | if (vma->vm_flags & VM_PFNMAP) { | ||
377 | unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; | ||
378 | if (pfn == vma->vm_pgoff + off) | ||
379 | return NULL; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Add some anal sanity checks for now. Eventually, | ||
384 | * we should just do "return pfn_to_page(pfn)", but | ||
385 | * in the meantime we check that we get a valid pfn, | ||
386 | * and that the resulting page looks ok. | ||
387 | * | ||
388 | * Remove this test eventually! | ||
389 | */ | ||
390 | if (unlikely(!pfn_valid(pfn))) { | ||
391 | print_bad_pte(vma, pte, addr); | ||
392 | return NULL; | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * NOTE! We still have PageReserved() pages in the page | ||
397 | * tables. | ||
398 | * | ||
399 | * The PAGE_ZERO() pages and various VDSO mappings can | ||
400 | * cause them to exist. | ||
401 | */ | ||
402 | return pfn_to_page(pfn); | ||
403 | } | ||
404 | |||
405 | /* | ||
353 | * copy one vm_area from one task to the other. Assumes the page tables | 406 | * copy one vm_area from one task to the other. Assumes the page tables |
354 | * already present in the new task to be cleared in the whole range | 407 | * already present in the new task to be cleared in the whole range |
355 | * covered by this vma. | 408 | * covered by this vma. |
@@ -363,7 +416,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
363 | unsigned long vm_flags = vma->vm_flags; | 416 | unsigned long vm_flags = vma->vm_flags; |
364 | pte_t pte = *src_pte; | 417 | pte_t pte = *src_pte; |
365 | struct page *page; | 418 | struct page *page; |
366 | unsigned long pfn; | ||
367 | 419 | ||
368 | /* pte contains position in swap or file, so copy. */ | 420 | /* pte contains position in swap or file, so copy. */ |
369 | if (unlikely(!pte_present(pte))) { | 421 | if (unlikely(!pte_present(pte))) { |
@@ -381,23 +433,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
381 | goto out_set_pte; | 433 | goto out_set_pte; |
382 | } | 434 | } |
383 | 435 | ||
384 | /* If the region is VM_RESERVED, the mapping is not | ||
385 | * mapped via rmap - duplicate the pte as is. | ||
386 | */ | ||
387 | if (vm_flags & VM_RESERVED) | ||
388 | goto out_set_pte; | ||
389 | |||
390 | pfn = pte_pfn(pte); | ||
391 | /* If the pte points outside of valid memory but | ||
392 | * the region is not VM_RESERVED, we have a problem. | ||
393 | */ | ||
394 | if (unlikely(!pfn_valid(pfn))) { | ||
395 | print_bad_pte(vma, pte, addr); | ||
396 | goto out_set_pte; /* try to do something sane */ | ||
397 | } | ||
398 | |||
399 | page = pfn_to_page(pfn); | ||
400 | |||
401 | /* | 436 | /* |
402 | * If it's a COW mapping, write protect it both | 437 | * If it's a COW mapping, write protect it both |
403 | * in the parent and the child | 438 | * in the parent and the child |
@@ -414,9 +449,13 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
414 | if (vm_flags & VM_SHARED) | 449 | if (vm_flags & VM_SHARED) |
415 | pte = pte_mkclean(pte); | 450 | pte = pte_mkclean(pte); |
416 | pte = pte_mkold(pte); | 451 | pte = pte_mkold(pte); |
417 | get_page(page); | 452 | |
418 | page_dup_rmap(page); | 453 | page = vm_normal_page(vma, addr, pte); |
419 | rss[!!PageAnon(page)]++; | 454 | if (page) { |
455 | get_page(page); | ||
456 | page_dup_rmap(page); | ||
457 | rss[!!PageAnon(page)]++; | ||
458 | } | ||
420 | 459 | ||
421 | out_set_pte: | 460 | out_set_pte: |
422 | set_pte_at(dst_mm, addr, dst_pte, pte); | 461 | set_pte_at(dst_mm, addr, dst_pte, pte); |
@@ -528,7 +567,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
528 | * readonly mappings. The tradeoff is that copy_page_range is more | 567 | * readonly mappings. The tradeoff is that copy_page_range is more |
529 | * efficient than faulting. | 568 | * efficient than faulting. |
530 | */ | 569 | */ |
531 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { | 570 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP))) { |
532 | if (!vma->anon_vma) | 571 | if (!vma->anon_vma) |
533 | return 0; | 572 | return 0; |
534 | } | 573 | } |
@@ -568,17 +607,11 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
568 | continue; | 607 | continue; |
569 | } | 608 | } |
570 | if (pte_present(ptent)) { | 609 | if (pte_present(ptent)) { |
571 | struct page *page = NULL; | 610 | struct page *page; |
572 | 611 | ||
573 | (*zap_work) -= PAGE_SIZE; | 612 | (*zap_work) -= PAGE_SIZE; |
574 | 613 | ||
575 | if (!(vma->vm_flags & VM_RESERVED)) { | 614 | page = vm_normal_page(vma, addr, ptent); |
576 | unsigned long pfn = pte_pfn(ptent); | ||
577 | if (unlikely(!pfn_valid(pfn))) | ||
578 | print_bad_pte(vma, ptent, addr); | ||
579 | else | ||
580 | page = pfn_to_page(pfn); | ||
581 | } | ||
582 | if (unlikely(details) && page) { | 615 | if (unlikely(details) && page) { |
583 | /* | 616 | /* |
584 | * unmap_shared_mapping_pages() wants to | 617 | * unmap_shared_mapping_pages() wants to |
@@ -834,7 +867,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
834 | /* | 867 | /* |
835 | * Do a quick page-table lookup for a single page. | 868 | * Do a quick page-table lookup for a single page. |
836 | */ | 869 | */ |
837 | struct page *follow_page(struct mm_struct *mm, unsigned long address, | 870 | struct page *follow_page(struct vm_area_struct *vma, unsigned long address, |
838 | unsigned int flags) | 871 | unsigned int flags) |
839 | { | 872 | { |
840 | pgd_t *pgd; | 873 | pgd_t *pgd; |
@@ -842,8 +875,8 @@ struct page *follow_page(struct mm_struct *mm, unsigned long address, | |||
842 | pmd_t *pmd; | 875 | pmd_t *pmd; |
843 | pte_t *ptep, pte; | 876 | pte_t *ptep, pte; |
844 | spinlock_t *ptl; | 877 | spinlock_t *ptl; |
845 | unsigned long pfn; | ||
846 | struct page *page; | 878 | struct page *page; |
879 | struct mm_struct *mm = vma->vm_mm; | ||
847 | 880 | ||
848 | page = follow_huge_addr(mm, address, flags & FOLL_WRITE); | 881 | page = follow_huge_addr(mm, address, flags & FOLL_WRITE); |
849 | if (!IS_ERR(page)) { | 882 | if (!IS_ERR(page)) { |
@@ -879,11 +912,10 @@ struct page *follow_page(struct mm_struct *mm, unsigned long address, | |||
879 | goto unlock; | 912 | goto unlock; |
880 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | 913 | if ((flags & FOLL_WRITE) && !pte_write(pte)) |
881 | goto unlock; | 914 | goto unlock; |
882 | pfn = pte_pfn(pte); | 915 | page = vm_normal_page(vma, address, pte); |
883 | if (!pfn_valid(pfn)) | 916 | if (unlikely(!page)) |
884 | goto unlock; | 917 | goto unlock; |
885 | 918 | ||
886 | page = pfn_to_page(pfn); | ||
887 | if (flags & FOLL_GET) | 919 | if (flags & FOLL_GET) |
888 | get_page(page); | 920 | get_page(page); |
889 | if (flags & FOLL_TOUCH) { | 921 | if (flags & FOLL_TOUCH) { |
@@ -956,8 +988,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
956 | return i ? : -EFAULT; | 988 | return i ? : -EFAULT; |
957 | } | 989 | } |
958 | if (pages) { | 990 | if (pages) { |
959 | pages[i] = pte_page(*pte); | 991 | struct page *page = vm_normal_page(gate_vma, start, *pte); |
960 | get_page(pages[i]); | 992 | pages[i] = page; |
993 | if (page) | ||
994 | get_page(page); | ||
961 | } | 995 | } |
962 | pte_unmap(pte); | 996 | pte_unmap(pte); |
963 | if (vmas) | 997 | if (vmas) |
@@ -968,7 +1002,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
968 | continue; | 1002 | continue; |
969 | } | 1003 | } |
970 | 1004 | ||
971 | if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED)) | 1005 | if (!vma || (vma->vm_flags & VM_IO) |
972 | || !(vm_flags & vma->vm_flags)) | 1006 | || !(vm_flags & vma->vm_flags)) |
973 | return i ? : -EFAULT; | 1007 | return i ? : -EFAULT; |
974 | 1008 | ||
@@ -992,7 +1026,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
992 | foll_flags |= FOLL_WRITE; | 1026 | foll_flags |= FOLL_WRITE; |
993 | 1027 | ||
994 | cond_resched(); | 1028 | cond_resched(); |
995 | while (!(page = follow_page(mm, start, foll_flags))) { | 1029 | while (!(page = follow_page(vma, start, foll_flags))) { |
996 | int ret; | 1030 | int ret; |
997 | ret = __handle_mm_fault(mm, vma, start, | 1031 | ret = __handle_mm_fault(mm, vma, start, |
998 | foll_flags & FOLL_WRITE); | 1032 | foll_flags & FOLL_WRITE); |
@@ -1191,10 +1225,17 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
1191 | * rest of the world about it: | 1225 | * rest of the world about it: |
1192 | * VM_IO tells people not to look at these pages | 1226 | * VM_IO tells people not to look at these pages |
1193 | * (accesses can have side effects). | 1227 | * (accesses can have side effects). |
1194 | * VM_RESERVED tells the core MM not to "manage" these pages | 1228 | * VM_RESERVED is specified all over the place, because |
1195 | * (e.g. refcount, mapcount, try to swap them out). | 1229 | * in 2.4 it kept swapout's vma scan off this vma; but |
1230 | * in 2.6 the LRU scan won't even find its pages, so this | ||
1231 | * flag means no more than count its pages in reserved_vm, | ||
1232 | * and omit it from core dump, even when VM_IO turned off. | ||
1233 | * VM_PFNMAP tells the core MM that the base pages are just | ||
1234 | * raw PFN mappings, and do not have a "struct page" associated | ||
1235 | * with them. | ||
1196 | */ | 1236 | */ |
1197 | vma->vm_flags |= VM_IO | VM_RESERVED; | 1237 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; |
1238 | vma->vm_pgoff = pfn; | ||
1198 | 1239 | ||
1199 | BUG_ON(addr >= end); | 1240 | BUG_ON(addr >= end); |
1200 | pfn -= addr >> PAGE_SHIFT; | 1241 | pfn -= addr >> PAGE_SHIFT; |
@@ -1249,6 +1290,26 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
1249 | return pte; | 1290 | return pte; |
1250 | } | 1291 | } |
1251 | 1292 | ||
1293 | static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va) | ||
1294 | { | ||
1295 | /* | ||
1296 | * If the source page was a PFN mapping, we don't have | ||
1297 | * a "struct page" for it. We do a best-effort copy by | ||
1298 | * just copying from the original user address. If that | ||
1299 | * fails, we just zero-fill it. Live with it. | ||
1300 | */ | ||
1301 | if (unlikely(!src)) { | ||
1302 | void *kaddr = kmap_atomic(dst, KM_USER0); | ||
1303 | unsigned long left = __copy_from_user_inatomic(kaddr, (void __user *)va, PAGE_SIZE); | ||
1304 | if (left) | ||
1305 | memset(kaddr, 0, PAGE_SIZE); | ||
1306 | kunmap_atomic(kaddr, KM_USER0); | ||
1307 | return; | ||
1308 | |||
1309 | } | ||
1310 | copy_user_highpage(dst, src, va); | ||
1311 | } | ||
1312 | |||
1252 | /* | 1313 | /* |
1253 | * This routine handles present pages, when users try to write | 1314 | * This routine handles present pages, when users try to write |
1254 | * to a shared page. It is done by copying the page to a new address | 1315 | * to a shared page. It is done by copying the page to a new address |
@@ -1271,22 +1332,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1271 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 1332 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1272 | spinlock_t *ptl, pte_t orig_pte) | 1333 | spinlock_t *ptl, pte_t orig_pte) |
1273 | { | 1334 | { |
1274 | struct page *old_page, *new_page; | 1335 | struct page *old_page, *src_page, *new_page; |
1275 | unsigned long pfn = pte_pfn(orig_pte); | ||
1276 | pte_t entry; | 1336 | pte_t entry; |
1277 | int ret = VM_FAULT_MINOR; | 1337 | int ret = VM_FAULT_MINOR; |
1278 | 1338 | ||
1279 | BUG_ON(vma->vm_flags & VM_RESERVED); | 1339 | old_page = vm_normal_page(vma, address, orig_pte); |
1280 | 1340 | src_page = old_page; | |
1281 | if (unlikely(!pfn_valid(pfn))) { | 1341 | if (!old_page) |
1282 | /* | 1342 | goto gotten; |
1283 | * Page table corrupted: show pte and kill process. | ||
1284 | */ | ||
1285 | print_bad_pte(vma, orig_pte, address); | ||
1286 | ret = VM_FAULT_OOM; | ||
1287 | goto unlock; | ||
1288 | } | ||
1289 | old_page = pfn_to_page(pfn); | ||
1290 | 1343 | ||
1291 | if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | 1344 | if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { |
1292 | int reuse = can_share_swap_page(old_page); | 1345 | int reuse = can_share_swap_page(old_page); |
@@ -1307,11 +1360,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1307 | * Ok, we need to copy. Oh, well.. | 1360 | * Ok, we need to copy. Oh, well.. |
1308 | */ | 1361 | */ |
1309 | page_cache_get(old_page); | 1362 | page_cache_get(old_page); |
1363 | gotten: | ||
1310 | pte_unmap_unlock(page_table, ptl); | 1364 | pte_unmap_unlock(page_table, ptl); |
1311 | 1365 | ||
1312 | if (unlikely(anon_vma_prepare(vma))) | 1366 | if (unlikely(anon_vma_prepare(vma))) |
1313 | goto oom; | 1367 | goto oom; |
1314 | if (old_page == ZERO_PAGE(address)) { | 1368 | if (src_page == ZERO_PAGE(address)) { |
1315 | new_page = alloc_zeroed_user_highpage(vma, address); | 1369 | new_page = alloc_zeroed_user_highpage(vma, address); |
1316 | if (!new_page) | 1370 | if (!new_page) |
1317 | goto oom; | 1371 | goto oom; |
@@ -1319,7 +1373,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1319 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 1373 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
1320 | if (!new_page) | 1374 | if (!new_page) |
1321 | goto oom; | 1375 | goto oom; |
1322 | copy_user_highpage(new_page, old_page, address); | 1376 | cow_user_page(new_page, src_page, address); |
1323 | } | 1377 | } |
1324 | 1378 | ||
1325 | /* | 1379 | /* |
@@ -1327,11 +1381,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1327 | */ | 1381 | */ |
1328 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 1382 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
1329 | if (likely(pte_same(*page_table, orig_pte))) { | 1383 | if (likely(pte_same(*page_table, orig_pte))) { |
1330 | page_remove_rmap(old_page); | 1384 | if (old_page) { |
1331 | if (!PageAnon(old_page)) { | 1385 | page_remove_rmap(old_page); |
1386 | if (!PageAnon(old_page)) { | ||
1387 | dec_mm_counter(mm, file_rss); | ||
1388 | inc_mm_counter(mm, anon_rss); | ||
1389 | } | ||
1390 | } else | ||
1332 | inc_mm_counter(mm, anon_rss); | 1391 | inc_mm_counter(mm, anon_rss); |
1333 | dec_mm_counter(mm, file_rss); | ||
1334 | } | ||
1335 | flush_cache_page(vma, address, pfn); | 1392 | flush_cache_page(vma, address, pfn); |
1336 | entry = mk_pte(new_page, vma->vm_page_prot); | 1393 | entry = mk_pte(new_page, vma->vm_page_prot); |
1337 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1394 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
@@ -1345,13 +1402,16 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1345 | new_page = old_page; | 1402 | new_page = old_page; |
1346 | ret |= VM_FAULT_WRITE; | 1403 | ret |= VM_FAULT_WRITE; |
1347 | } | 1404 | } |
1348 | page_cache_release(new_page); | 1405 | if (new_page) |
1349 | page_cache_release(old_page); | 1406 | page_cache_release(new_page); |
1407 | if (old_page) | ||
1408 | page_cache_release(old_page); | ||
1350 | unlock: | 1409 | unlock: |
1351 | pte_unmap_unlock(page_table, ptl); | 1410 | pte_unmap_unlock(page_table, ptl); |
1352 | return ret; | 1411 | return ret; |
1353 | oom: | 1412 | oom: |
1354 | page_cache_release(old_page); | 1413 | if (old_page) |
1414 | page_cache_release(old_page); | ||
1355 | return VM_FAULT_OOM; | 1415 | return VM_FAULT_OOM; |
1356 | } | 1416 | } |
1357 | 1417 | ||
@@ -1849,7 +1909,6 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1849 | int anon = 0; | 1909 | int anon = 0; |
1850 | 1910 | ||
1851 | pte_unmap(page_table); | 1911 | pte_unmap(page_table); |
1852 | |||
1853 | if (vma->vm_file) { | 1912 | if (vma->vm_file) { |
1854 | mapping = vma->vm_file->f_mapping; | 1913 | mapping = vma->vm_file->f_mapping; |
1855 | sequence = mapping->truncate_count; | 1914 | sequence = mapping->truncate_count; |
@@ -1882,7 +1941,7 @@ retry: | |||
1882 | page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 1941 | page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
1883 | if (!page) | 1942 | if (!page) |
1884 | goto oom; | 1943 | goto oom; |
1885 | copy_user_highpage(page, new_page, address); | 1944 | cow_user_page(page, new_page, address); |
1886 | page_cache_release(new_page); | 1945 | page_cache_release(new_page); |
1887 | new_page = page; | 1946 | new_page = page; |
1888 | anon = 1; | 1947 | anon = 1; |
@@ -1924,7 +1983,7 @@ retry: | |||
1924 | inc_mm_counter(mm, anon_rss); | 1983 | inc_mm_counter(mm, anon_rss); |
1925 | lru_cache_add_active(new_page); | 1984 | lru_cache_add_active(new_page); |
1926 | page_add_anon_rmap(new_page, vma, address); | 1985 | page_add_anon_rmap(new_page, vma, address); |
1927 | } else if (!(vma->vm_flags & VM_RESERVED)) { | 1986 | } else { |
1928 | inc_mm_counter(mm, file_rss); | 1987 | inc_mm_counter(mm, file_rss); |
1929 | page_add_file_rmap(new_page); | 1988 | page_add_file_rmap(new_page); |
1930 | } | 1989 | } |
@@ -2101,6 +2160,12 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) | |||
2101 | spin_unlock(&mm->page_table_lock); | 2160 | spin_unlock(&mm->page_table_lock); |
2102 | return 0; | 2161 | return 0; |
2103 | } | 2162 | } |
2163 | #else | ||
2164 | /* Workaround for gcc 2.96 */ | ||
2165 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) | ||
2166 | { | ||
2167 | return 0; | ||
2168 | } | ||
2104 | #endif /* __PAGETABLE_PUD_FOLDED */ | 2169 | #endif /* __PAGETABLE_PUD_FOLDED */ |
2105 | 2170 | ||
2106 | #ifndef __PAGETABLE_PMD_FOLDED | 2171 | #ifndef __PAGETABLE_PMD_FOLDED |
@@ -2129,6 +2194,12 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | |||
2129 | spin_unlock(&mm->page_table_lock); | 2194 | spin_unlock(&mm->page_table_lock); |
2130 | return 0; | 2195 | return 0; |
2131 | } | 2196 | } |
2197 | #else | ||
2198 | /* Workaround for gcc 2.96 */ | ||
2199 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | ||
2200 | { | ||
2201 | return 0; | ||
2202 | } | ||
2132 | #endif /* __PAGETABLE_PMD_FOLDED */ | 2203 | #endif /* __PAGETABLE_PMD_FOLDED */ |
2133 | 2204 | ||
2134 | int make_pages_present(unsigned long addr, unsigned long end) | 2205 | int make_pages_present(unsigned long addr, unsigned long end) |
@@ -2203,7 +2274,7 @@ static int __init gate_vma_init(void) | |||
2203 | gate_vma.vm_start = FIXADDR_USER_START; | 2274 | gate_vma.vm_start = FIXADDR_USER_START; |
2204 | gate_vma.vm_end = FIXADDR_USER_END; | 2275 | gate_vma.vm_end = FIXADDR_USER_END; |
2205 | gate_vma.vm_page_prot = PAGE_READONLY; | 2276 | gate_vma.vm_page_prot = PAGE_READONLY; |
2206 | gate_vma.vm_flags = VM_RESERVED; | 2277 | gate_vma.vm_flags = 0; |
2207 | return 0; | 2278 | return 0; |
2208 | } | 2279 | } |
2209 | __initcall(gate_vma_init); | 2280 | __initcall(gate_vma_init); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 5abc57c2b8bd..bec88c81244e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -189,17 +189,15 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
189 | 189 | ||
190 | orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 190 | orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
191 | do { | 191 | do { |
192 | unsigned long pfn; | 192 | struct page *page; |
193 | unsigned int nid; | 193 | unsigned int nid; |
194 | 194 | ||
195 | if (!pte_present(*pte)) | 195 | if (!pte_present(*pte)) |
196 | continue; | 196 | continue; |
197 | pfn = pte_pfn(*pte); | 197 | page = vm_normal_page(vma, addr, *pte); |
198 | if (!pfn_valid(pfn)) { | 198 | if (!page) |
199 | print_bad_pte(vma, *pte, addr); | ||
200 | continue; | 199 | continue; |
201 | } | 200 | nid = page_to_nid(page); |
202 | nid = pfn_to_nid(pfn); | ||
203 | if (!node_isset(nid, *nodes)) | 201 | if (!node_isset(nid, *nodes)) |
204 | break; | 202 | break; |
205 | } while (pte++, addr += PAGE_SIZE, addr != end); | 203 | } while (pte++, addr += PAGE_SIZE, addr != end); |
@@ -269,8 +267,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
269 | first = find_vma(mm, start); | 267 | first = find_vma(mm, start); |
270 | if (!first) | 268 | if (!first) |
271 | return ERR_PTR(-EFAULT); | 269 | return ERR_PTR(-EFAULT); |
272 | if (first->vm_flags & VM_RESERVED) | ||
273 | return ERR_PTR(-EACCES); | ||
274 | prev = NULL; | 270 | prev = NULL; |
275 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { | 271 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { |
276 | if (!vma->vm_next && vma->vm_end < end) | 272 | if (!vma->vm_next && vma->vm_end < end) |
@@ -1076,17 +1076,6 @@ munmap_back: | |||
1076 | error = file->f_op->mmap(file, vma); | 1076 | error = file->f_op->mmap(file, vma); |
1077 | if (error) | 1077 | if (error) |
1078 | goto unmap_and_free_vma; | 1078 | goto unmap_and_free_vma; |
1079 | if ((vma->vm_flags & (VM_SHARED | VM_WRITE | VM_RESERVED)) | ||
1080 | == (VM_WRITE | VM_RESERVED)) { | ||
1081 | printk(KERN_WARNING "program %s is using MAP_PRIVATE, " | ||
1082 | "PROT_WRITE mmap of VM_RESERVED memory, which " | ||
1083 | "is deprecated. Please report this to " | ||
1084 | "linux-kernel@vger.kernel.org\n",current->comm); | ||
1085 | if (vma->vm_ops && vma->vm_ops->close) | ||
1086 | vma->vm_ops->close(vma); | ||
1087 | error = -EACCES; | ||
1088 | goto unmap_and_free_vma; | ||
1089 | } | ||
1090 | } else if (vm_flags & VM_SHARED) { | 1079 | } else if (vm_flags & VM_SHARED) { |
1091 | error = shmem_zero_setup(vma); | 1080 | error = shmem_zero_setup(vma); |
1092 | if (error) | 1081 | if (error) |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 17a2b52b753b..653b8571c1ed 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -124,14 +124,6 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
124 | * a MAP_NORESERVE private mapping to writable will now reserve. | 124 | * a MAP_NORESERVE private mapping to writable will now reserve. |
125 | */ | 125 | */ |
126 | if (newflags & VM_WRITE) { | 126 | if (newflags & VM_WRITE) { |
127 | if (oldflags & VM_RESERVED) { | ||
128 | BUG_ON(oldflags & VM_WRITE); | ||
129 | printk(KERN_WARNING "program %s is using MAP_PRIVATE, " | ||
130 | "PROT_WRITE mprotect of VM_RESERVED memory, " | ||
131 | "which is deprecated. Please report this to " | ||
132 | "linux-kernel@vger.kernel.org\n",current->comm); | ||
133 | return -EACCES; | ||
134 | } | ||
135 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { | 127 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { |
136 | charged = nrpages; | 128 | charged = nrpages; |
137 | if (security_vm_enough_memory(charged)) | 129 | if (security_vm_enough_memory(charged)) |
diff --git a/mm/msync.c b/mm/msync.c index 0e040e9c39d8..1b5b6f662dcf 100644 --- a/mm/msync.c +++ b/mm/msync.c | |||
@@ -27,7 +27,6 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
27 | again: | 27 | again: |
28 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 28 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
29 | do { | 29 | do { |
30 | unsigned long pfn; | ||
31 | struct page *page; | 30 | struct page *page; |
32 | 31 | ||
33 | if (progress >= 64) { | 32 | if (progress >= 64) { |
@@ -40,13 +39,9 @@ again: | |||
40 | continue; | 39 | continue; |
41 | if (!pte_maybe_dirty(*pte)) | 40 | if (!pte_maybe_dirty(*pte)) |
42 | continue; | 41 | continue; |
43 | pfn = pte_pfn(*pte); | 42 | page = vm_normal_page(vma, addr, *pte); |
44 | if (unlikely(!pfn_valid(pfn))) { | 43 | if (!page) |
45 | print_bad_pte(vma, *pte, addr); | ||
46 | continue; | 44 | continue; |
47 | } | ||
48 | page = pfn_to_page(pfn); | ||
49 | |||
50 | if (ptep_clear_flush_dirty(vma, addr, pte) || | 45 | if (ptep_clear_flush_dirty(vma, addr, pte) || |
51 | page_test_and_clear_dirty(page)) | 46 | page_test_and_clear_dirty(page)) |
52 | set_page_dirty(page); | 47 | set_page_dirty(page); |
@@ -97,9 +92,8 @@ static void msync_page_range(struct vm_area_struct *vma, | |||
97 | /* For hugepages we can't go walking the page table normally, | 92 | /* For hugepages we can't go walking the page table normally, |
98 | * but that's ok, hugetlbfs is memory based, so we don't need | 93 | * but that's ok, hugetlbfs is memory based, so we don't need |
99 | * to do anything more on an msync(). | 94 | * to do anything more on an msync(). |
100 | * Can't do anything with VM_RESERVED regions either. | ||
101 | */ | 95 | */ |
102 | if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED)) | 96 | if (vma->vm_flags & VM_HUGETLB) |
103 | return; | 97 | return; |
104 | 98 | ||
105 | BUG_ON(addr >= end); | 99 | BUG_ON(addr >= end); |
diff --git a/mm/nommu.c b/mm/nommu.c index 6deb6ab3d6ad..c1196812876b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1045,7 +1045,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) | |||
1045 | 1045 | ||
1046 | EXPORT_SYMBOL(find_vma); | 1046 | EXPORT_SYMBOL(find_vma); |
1047 | 1047 | ||
1048 | struct page *follow_page(struct mm_struct *mm, unsigned long address, | 1048 | struct page *follow_page(struct vm_area_struct *vma, unsigned long address, |
1049 | unsigned int foll_flags) | 1049 | unsigned int foll_flags) |
1050 | { | 1050 | { |
1051 | return NULL; | 1051 | return NULL; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bd4de592dc23..b257720edfc8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -140,18 +140,13 @@ static void bad_page(const char *function, struct page *page) | |||
140 | 1 << PG_reclaim | | 140 | 1 << PG_reclaim | |
141 | 1 << PG_slab | | 141 | 1 << PG_slab | |
142 | 1 << PG_swapcache | | 142 | 1 << PG_swapcache | |
143 | 1 << PG_writeback | | 143 | 1 << PG_writeback ); |
144 | 1 << PG_reserved ); | ||
145 | set_page_count(page, 0); | 144 | set_page_count(page, 0); |
146 | reset_page_mapcount(page); | 145 | reset_page_mapcount(page); |
147 | page->mapping = NULL; | 146 | page->mapping = NULL; |
148 | add_taint(TAINT_BAD_PAGE); | 147 | add_taint(TAINT_BAD_PAGE); |
149 | } | 148 | } |
150 | 149 | ||
151 | #ifndef CONFIG_HUGETLB_PAGE | ||
152 | #define prep_compound_page(page, order) do { } while (0) | ||
153 | #define destroy_compound_page(page, order) do { } while (0) | ||
154 | #else | ||
155 | /* | 150 | /* |
156 | * Higher-order pages are called "compound pages". They are structured thusly: | 151 | * Higher-order pages are called "compound pages". They are structured thusly: |
157 | * | 152 | * |
@@ -205,7 +200,6 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
205 | ClearPageCompound(p); | 200 | ClearPageCompound(p); |
206 | } | 201 | } |
207 | } | 202 | } |
208 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
209 | 203 | ||
210 | /* | 204 | /* |
211 | * function for dealing with page's order in buddy system. | 205 | * function for dealing with page's order in buddy system. |
@@ -340,7 +334,7 @@ static inline void __free_pages_bulk (struct page *page, | |||
340 | zone->free_area[order].nr_free++; | 334 | zone->free_area[order].nr_free++; |
341 | } | 335 | } |
342 | 336 | ||
343 | static inline void free_pages_check(const char *function, struct page *page) | 337 | static inline int free_pages_check(const char *function, struct page *page) |
344 | { | 338 | { |
345 | if ( page_mapcount(page) || | 339 | if ( page_mapcount(page) || |
346 | page->mapping != NULL || | 340 | page->mapping != NULL || |
@@ -358,6 +352,12 @@ static inline void free_pages_check(const char *function, struct page *page) | |||
358 | bad_page(function, page); | 352 | bad_page(function, page); |
359 | if (PageDirty(page)) | 353 | if (PageDirty(page)) |
360 | __ClearPageDirty(page); | 354 | __ClearPageDirty(page); |
355 | /* | ||
356 | * For now, we report if PG_reserved was found set, but do not | ||
357 | * clear it, and do not free the page. But we shall soon need | ||
358 | * to do more, for when the ZERO_PAGE count wraps negative. | ||
359 | */ | ||
360 | return PageReserved(page); | ||
361 | } | 361 | } |
362 | 362 | ||
363 | /* | 363 | /* |
@@ -397,11 +397,10 @@ void __free_pages_ok(struct page *page, unsigned int order) | |||
397 | { | 397 | { |
398 | LIST_HEAD(list); | 398 | LIST_HEAD(list); |
399 | int i; | 399 | int i; |
400 | int reserved = 0; | ||
400 | 401 | ||
401 | arch_free_page(page, order); | 402 | arch_free_page(page, order); |
402 | 403 | ||
403 | mod_page_state(pgfree, 1 << order); | ||
404 | |||
405 | #ifndef CONFIG_MMU | 404 | #ifndef CONFIG_MMU |
406 | if (order > 0) | 405 | if (order > 0) |
407 | for (i = 1 ; i < (1 << order) ; ++i) | 406 | for (i = 1 ; i < (1 << order) ; ++i) |
@@ -409,8 +408,12 @@ void __free_pages_ok(struct page *page, unsigned int order) | |||
409 | #endif | 408 | #endif |
410 | 409 | ||
411 | for (i = 0 ; i < (1 << order) ; ++i) | 410 | for (i = 0 ; i < (1 << order) ; ++i) |
412 | free_pages_check(__FUNCTION__, page + i); | 411 | reserved += free_pages_check(__FUNCTION__, page + i); |
412 | if (reserved) | ||
413 | return; | ||
414 | |||
413 | list_add(&page->lru, &list); | 415 | list_add(&page->lru, &list); |
416 | mod_page_state(pgfree, 1 << order); | ||
414 | kernel_map_pages(page, 1<<order, 0); | 417 | kernel_map_pages(page, 1<<order, 0); |
415 | free_pages_bulk(page_zone(page), 1, &list, order); | 418 | free_pages_bulk(page_zone(page), 1, &list, order); |
416 | } | 419 | } |
@@ -468,7 +471,7 @@ void set_page_refs(struct page *page, int order) | |||
468 | /* | 471 | /* |
469 | * This page is about to be returned from the page allocator | 472 | * This page is about to be returned from the page allocator |
470 | */ | 473 | */ |
471 | static void prep_new_page(struct page *page, int order) | 474 | static int prep_new_page(struct page *page, int order) |
472 | { | 475 | { |
473 | if ( page_mapcount(page) || | 476 | if ( page_mapcount(page) || |
474 | page->mapping != NULL || | 477 | page->mapping != NULL || |
@@ -486,12 +489,20 @@ static void prep_new_page(struct page *page, int order) | |||
486 | 1 << PG_reserved ))) | 489 | 1 << PG_reserved ))) |
487 | bad_page(__FUNCTION__, page); | 490 | bad_page(__FUNCTION__, page); |
488 | 491 | ||
492 | /* | ||
493 | * For now, we report if PG_reserved was found set, but do not | ||
494 | * clear it, and do not allocate the page: as a safety net. | ||
495 | */ | ||
496 | if (PageReserved(page)) | ||
497 | return 1; | ||
498 | |||
489 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | | 499 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | |
490 | 1 << PG_referenced | 1 << PG_arch_1 | | 500 | 1 << PG_referenced | 1 << PG_arch_1 | |
491 | 1 << PG_checked | 1 << PG_mappedtodisk); | 501 | 1 << PG_checked | 1 << PG_mappedtodisk); |
492 | set_page_private(page, 0); | 502 | set_page_private(page, 0); |
493 | set_page_refs(page, order); | 503 | set_page_refs(page, order); |
494 | kernel_map_pages(page, 1 << order, 1); | 504 | kernel_map_pages(page, 1 << order, 1); |
505 | return 0; | ||
495 | } | 506 | } |
496 | 507 | ||
497 | /* | 508 | /* |
@@ -674,11 +685,14 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
674 | 685 | ||
675 | arch_free_page(page, 0); | 686 | arch_free_page(page, 0); |
676 | 687 | ||
677 | kernel_map_pages(page, 1, 0); | ||
678 | inc_page_state(pgfree); | ||
679 | if (PageAnon(page)) | 688 | if (PageAnon(page)) |
680 | page->mapping = NULL; | 689 | page->mapping = NULL; |
681 | free_pages_check(__FUNCTION__, page); | 690 | if (free_pages_check(__FUNCTION__, page)) |
691 | return; | ||
692 | |||
693 | inc_page_state(pgfree); | ||
694 | kernel_map_pages(page, 1, 0); | ||
695 | |||
682 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 696 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
683 | local_irq_save(flags); | 697 | local_irq_save(flags); |
684 | list_add(&page->lru, &pcp->list); | 698 | list_add(&page->lru, &pcp->list); |
@@ -717,12 +731,14 @@ static struct page * | |||
717 | buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) | 731 | buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) |
718 | { | 732 | { |
719 | unsigned long flags; | 733 | unsigned long flags; |
720 | struct page *page = NULL; | 734 | struct page *page; |
721 | int cold = !!(gfp_flags & __GFP_COLD); | 735 | int cold = !!(gfp_flags & __GFP_COLD); |
722 | 736 | ||
737 | again: | ||
723 | if (order == 0) { | 738 | if (order == 0) { |
724 | struct per_cpu_pages *pcp; | 739 | struct per_cpu_pages *pcp; |
725 | 740 | ||
741 | page = NULL; | ||
726 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 742 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
727 | local_irq_save(flags); | 743 | local_irq_save(flags); |
728 | if (pcp->count <= pcp->low) | 744 | if (pcp->count <= pcp->low) |
@@ -744,7 +760,8 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) | |||
744 | if (page != NULL) { | 760 | if (page != NULL) { |
745 | BUG_ON(bad_range(zone, page)); | 761 | BUG_ON(bad_range(zone, page)); |
746 | mod_page_state_zone(zone, pgalloc, 1 << order); | 762 | mod_page_state_zone(zone, pgalloc, 1 << order); |
747 | prep_new_page(page, order); | 763 | if (prep_new_page(page, order)) |
764 | goto again; | ||
748 | 765 | ||
749 | if (gfp_flags & __GFP_ZERO) | 766 | if (gfp_flags & __GFP_ZERO) |
750 | prep_zero_page(page, order, gfp_flags); | 767 | prep_zero_page(page, order, gfp_flags); |
@@ -756,9 +773,12 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) | |||
756 | } | 773 | } |
757 | 774 | ||
758 | #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ | 775 | #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ |
759 | #define ALLOC_HARDER 0x02 /* try to alloc harder */ | 776 | #define ALLOC_WMARK_MIN 0x02 /* use pages_min watermark */ |
760 | #define ALLOC_HIGH 0x04 /* __GFP_HIGH set */ | 777 | #define ALLOC_WMARK_LOW 0x04 /* use pages_low watermark */ |
761 | #define ALLOC_CPUSET 0x08 /* check for correct cpuset */ | 778 | #define ALLOC_WMARK_HIGH 0x08 /* use pages_high watermark */ |
779 | #define ALLOC_HARDER 0x10 /* try to alloc harder */ | ||
780 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ | ||
781 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ | ||
762 | 782 | ||
763 | /* | 783 | /* |
764 | * Return 1 if free pages are above 'mark'. This takes into account the order | 784 | * Return 1 if free pages are above 'mark'. This takes into account the order |
@@ -813,7 +833,14 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
813 | continue; | 833 | continue; |
814 | 834 | ||
815 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 835 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { |
816 | if (!zone_watermark_ok(*z, order, (*z)->pages_low, | 836 | unsigned long mark; |
837 | if (alloc_flags & ALLOC_WMARK_MIN) | ||
838 | mark = (*z)->pages_min; | ||
839 | else if (alloc_flags & ALLOC_WMARK_LOW) | ||
840 | mark = (*z)->pages_low; | ||
841 | else | ||
842 | mark = (*z)->pages_high; | ||
843 | if (!zone_watermark_ok(*z, order, mark, | ||
817 | classzone_idx, alloc_flags)) | 844 | classzone_idx, alloc_flags)) |
818 | continue; | 845 | continue; |
819 | } | 846 | } |
@@ -854,7 +881,7 @@ restart: | |||
854 | } | 881 | } |
855 | 882 | ||
856 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 883 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
857 | zonelist, ALLOC_CPUSET); | 884 | zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); |
858 | if (page) | 885 | if (page) |
859 | goto got_pg; | 886 | goto got_pg; |
860 | 887 | ||
@@ -871,7 +898,7 @@ restart: | |||
871 | * cannot run direct reclaim, or if the caller has realtime scheduling | 898 | * cannot run direct reclaim, or if the caller has realtime scheduling |
872 | * policy. | 899 | * policy. |
873 | */ | 900 | */ |
874 | alloc_flags = 0; | 901 | alloc_flags = ALLOC_WMARK_MIN; |
875 | if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) | 902 | if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) |
876 | alloc_flags |= ALLOC_HARDER; | 903 | alloc_flags |= ALLOC_HARDER; |
877 | if (gfp_mask & __GFP_HIGH) | 904 | if (gfp_mask & __GFP_HIGH) |
@@ -942,7 +969,7 @@ rebalance: | |||
942 | * under heavy pressure. | 969 | * under heavy pressure. |
943 | */ | 970 | */ |
944 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 971 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
945 | zonelist, ALLOC_CPUSET); | 972 | zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); |
946 | if (page) | 973 | if (page) |
947 | goto got_pg; | 974 | goto got_pg; |
948 | 975 | ||
@@ -225,7 +225,7 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
225 | 225 | ||
226 | /* | 226 | /* |
227 | * At what user virtual address is page expected in vma? checking that the | 227 | * At what user virtual address is page expected in vma? checking that the |
228 | * page matches the vma: currently only used by unuse_process, on anon pages. | 228 | * page matches the vma: currently only used on anon pages, by unuse_vma; |
229 | */ | 229 | */ |
230 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | 230 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) |
231 | { | 231 | { |
@@ -234,7 +234,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | |||
234 | (void *)page->mapping - PAGE_MAPPING_ANON) | 234 | (void *)page->mapping - PAGE_MAPPING_ANON) |
235 | return -EFAULT; | 235 | return -EFAULT; |
236 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { | 236 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { |
237 | if (vma->vm_file->f_mapping != page->mapping) | 237 | if (!vma->vm_file || |
238 | vma->vm_file->f_mapping != page->mapping) | ||
238 | return -EFAULT; | 239 | return -EFAULT; |
239 | } else | 240 | } else |
240 | return -EFAULT; | 241 | return -EFAULT; |
@@ -289,7 +290,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
289 | * repeatedly from either page_referenced_anon or page_referenced_file. | 290 | * repeatedly from either page_referenced_anon or page_referenced_file. |
290 | */ | 291 | */ |
291 | static int page_referenced_one(struct page *page, | 292 | static int page_referenced_one(struct page *page, |
292 | struct vm_area_struct *vma, unsigned int *mapcount, int ignore_token) | 293 | struct vm_area_struct *vma, unsigned int *mapcount) |
293 | { | 294 | { |
294 | struct mm_struct *mm = vma->vm_mm; | 295 | struct mm_struct *mm = vma->vm_mm; |
295 | unsigned long address; | 296 | unsigned long address; |
@@ -310,7 +311,7 @@ static int page_referenced_one(struct page *page, | |||
310 | 311 | ||
311 | /* Pretend the page is referenced if the task has the | 312 | /* Pretend the page is referenced if the task has the |
312 | swap token and is in the middle of a page fault. */ | 313 | swap token and is in the middle of a page fault. */ |
313 | if (mm != current->mm && !ignore_token && has_swap_token(mm) && | 314 | if (mm != current->mm && has_swap_token(mm) && |
314 | rwsem_is_locked(&mm->mmap_sem)) | 315 | rwsem_is_locked(&mm->mmap_sem)) |
315 | referenced++; | 316 | referenced++; |
316 | 317 | ||
@@ -320,7 +321,7 @@ out: | |||
320 | return referenced; | 321 | return referenced; |
321 | } | 322 | } |
322 | 323 | ||
323 | static int page_referenced_anon(struct page *page, int ignore_token) | 324 | static int page_referenced_anon(struct page *page) |
324 | { | 325 | { |
325 | unsigned int mapcount; | 326 | unsigned int mapcount; |
326 | struct anon_vma *anon_vma; | 327 | struct anon_vma *anon_vma; |
@@ -333,8 +334,7 @@ static int page_referenced_anon(struct page *page, int ignore_token) | |||
333 | 334 | ||
334 | mapcount = page_mapcount(page); | 335 | mapcount = page_mapcount(page); |
335 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 336 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
336 | referenced += page_referenced_one(page, vma, &mapcount, | 337 | referenced += page_referenced_one(page, vma, &mapcount); |
337 | ignore_token); | ||
338 | if (!mapcount) | 338 | if (!mapcount) |
339 | break; | 339 | break; |
340 | } | 340 | } |
@@ -353,7 +353,7 @@ static int page_referenced_anon(struct page *page, int ignore_token) | |||
353 | * | 353 | * |
354 | * This function is only called from page_referenced for object-based pages. | 354 | * This function is only called from page_referenced for object-based pages. |
355 | */ | 355 | */ |
356 | static int page_referenced_file(struct page *page, int ignore_token) | 356 | static int page_referenced_file(struct page *page) |
357 | { | 357 | { |
358 | unsigned int mapcount; | 358 | unsigned int mapcount; |
359 | struct address_space *mapping = page->mapping; | 359 | struct address_space *mapping = page->mapping; |
@@ -391,8 +391,7 @@ static int page_referenced_file(struct page *page, int ignore_token) | |||
391 | referenced++; | 391 | referenced++; |
392 | break; | 392 | break; |
393 | } | 393 | } |
394 | referenced += page_referenced_one(page, vma, &mapcount, | 394 | referenced += page_referenced_one(page, vma, &mapcount); |
395 | ignore_token); | ||
396 | if (!mapcount) | 395 | if (!mapcount) |
397 | break; | 396 | break; |
398 | } | 397 | } |
@@ -409,13 +408,10 @@ static int page_referenced_file(struct page *page, int ignore_token) | |||
409 | * Quick test_and_clear_referenced for all mappings to a page, | 408 | * Quick test_and_clear_referenced for all mappings to a page, |
410 | * returns the number of ptes which referenced the page. | 409 | * returns the number of ptes which referenced the page. |
411 | */ | 410 | */ |
412 | int page_referenced(struct page *page, int is_locked, int ignore_token) | 411 | int page_referenced(struct page *page, int is_locked) |
413 | { | 412 | { |
414 | int referenced = 0; | 413 | int referenced = 0; |
415 | 414 | ||
416 | if (!swap_token_default_timeout) | ||
417 | ignore_token = 1; | ||
418 | |||
419 | if (page_test_and_clear_young(page)) | 415 | if (page_test_and_clear_young(page)) |
420 | referenced++; | 416 | referenced++; |
421 | 417 | ||
@@ -424,15 +420,14 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) | |||
424 | 420 | ||
425 | if (page_mapped(page) && page->mapping) { | 421 | if (page_mapped(page) && page->mapping) { |
426 | if (PageAnon(page)) | 422 | if (PageAnon(page)) |
427 | referenced += page_referenced_anon(page, ignore_token); | 423 | referenced += page_referenced_anon(page); |
428 | else if (is_locked) | 424 | else if (is_locked) |
429 | referenced += page_referenced_file(page, ignore_token); | 425 | referenced += page_referenced_file(page); |
430 | else if (TestSetPageLocked(page)) | 426 | else if (TestSetPageLocked(page)) |
431 | referenced++; | 427 | referenced++; |
432 | else { | 428 | else { |
433 | if (page->mapping) | 429 | if (page->mapping) |
434 | referenced += page_referenced_file(page, | 430 | referenced += page_referenced_file(page); |
435 | ignore_token); | ||
436 | unlock_page(page); | 431 | unlock_page(page); |
437 | } | 432 | } |
438 | } | 433 | } |
@@ -529,10 +524,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) | |||
529 | * If the page is mlock()d, we cannot swap it out. | 524 | * If the page is mlock()d, we cannot swap it out. |
530 | * If it's recently referenced (perhaps page_referenced | 525 | * If it's recently referenced (perhaps page_referenced |
531 | * skipped over this mm) then we should reactivate it. | 526 | * skipped over this mm) then we should reactivate it. |
532 | * | ||
533 | * Pages belonging to VM_RESERVED regions should not happen here. | ||
534 | */ | 527 | */ |
535 | if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || | 528 | if ((vma->vm_flags & VM_LOCKED) || |
536 | ptep_clear_flush_young(vma, address, pte)) { | 529 | ptep_clear_flush_young(vma, address, pte)) { |
537 | ret = SWAP_FAIL; | 530 | ret = SWAP_FAIL; |
538 | goto out_unmap; | 531 | goto out_unmap; |
@@ -613,7 +606,6 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
613 | struct page *page; | 606 | struct page *page; |
614 | unsigned long address; | 607 | unsigned long address; |
615 | unsigned long end; | 608 | unsigned long end; |
616 | unsigned long pfn; | ||
617 | 609 | ||
618 | address = (vma->vm_start + cursor) & CLUSTER_MASK; | 610 | address = (vma->vm_start + cursor) & CLUSTER_MASK; |
619 | end = address + CLUSTER_SIZE; | 611 | end = address + CLUSTER_SIZE; |
@@ -642,15 +634,8 @@ static void try_to_unmap_cluster(unsigned long cursor, | |||
642 | for (; address < end; pte++, address += PAGE_SIZE) { | 634 | for (; address < end; pte++, address += PAGE_SIZE) { |
643 | if (!pte_present(*pte)) | 635 | if (!pte_present(*pte)) |
644 | continue; | 636 | continue; |
645 | 637 | page = vm_normal_page(vma, address, *pte); | |
646 | pfn = pte_pfn(*pte); | 638 | BUG_ON(!page || PageAnon(page)); |
647 | if (unlikely(!pfn_valid(pfn))) { | ||
648 | print_bad_pte(vma, *pte, address); | ||
649 | continue; | ||
650 | } | ||
651 | |||
652 | page = pfn_to_page(pfn); | ||
653 | BUG_ON(PageAnon(page)); | ||
654 | 639 | ||
655 | if (ptep_clear_flush_young(vma, address, pte)) | 640 | if (ptep_clear_flush_young(vma, address, pte)) |
656 | continue; | 641 | continue; |
@@ -727,7 +712,7 @@ static int try_to_unmap_file(struct page *page) | |||
727 | 712 | ||
728 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 713 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
729 | shared.vm_set.list) { | 714 | shared.vm_set.list) { |
730 | if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) | 715 | if (vma->vm_flags & VM_LOCKED) |
731 | continue; | 716 | continue; |
732 | cursor = (unsigned long) vma->vm_private_data; | 717 | cursor = (unsigned long) vma->vm_private_data; |
733 | if (cursor > max_nl_cursor) | 718 | if (cursor > max_nl_cursor) |
@@ -761,7 +746,7 @@ static int try_to_unmap_file(struct page *page) | |||
761 | do { | 746 | do { |
762 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 747 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
763 | shared.vm_set.list) { | 748 | shared.vm_set.list) { |
764 | if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) | 749 | if (vma->vm_flags & VM_LOCKED) |
765 | continue; | 750 | continue; |
766 | cursor = (unsigned long) vma->vm_private_data; | 751 | cursor = (unsigned long) vma->vm_private_data; |
767 | while ( cursor < max_nl_cursor && | 752 | while ( cursor < max_nl_cursor && |
@@ -783,11 +768,8 @@ static int try_to_unmap_file(struct page *page) | |||
783 | * in locked vmas). Reset cursor on all unreserved nonlinear | 768 | * in locked vmas). Reset cursor on all unreserved nonlinear |
784 | * vmas, now forgetting on which ones it had fallen behind. | 769 | * vmas, now forgetting on which ones it had fallen behind. |
785 | */ | 770 | */ |
786 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 771 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) |
787 | shared.vm_set.list) { | 772 | vma->vm_private_data = NULL; |
788 | if (!(vma->vm_flags & VM_RESERVED)) | ||
789 | vma->vm_private_data = NULL; | ||
790 | } | ||
791 | out: | 773 | out: |
792 | spin_unlock(&mapping->i_mmap_lock); | 774 | spin_unlock(&mapping->i_mmap_lock); |
793 | return ret; | 775 | return ret; |
@@ -34,8 +34,6 @@ | |||
34 | /* How many pages do we try to swap or page in/out together? */ | 34 | /* How many pages do we try to swap or page in/out together? */ |
35 | int page_cluster; | 35 | int page_cluster; |
36 | 36 | ||
37 | #ifdef CONFIG_HUGETLB_PAGE | ||
38 | |||
39 | void put_page(struct page *page) | 37 | void put_page(struct page *page) |
40 | { | 38 | { |
41 | if (unlikely(PageCompound(page))) { | 39 | if (unlikely(PageCompound(page))) { |
@@ -52,7 +50,6 @@ void put_page(struct page *page) | |||
52 | __page_cache_release(page); | 50 | __page_cache_release(page); |
53 | } | 51 | } |
54 | EXPORT_SYMBOL(put_page); | 52 | EXPORT_SYMBOL(put_page); |
55 | #endif | ||
56 | 53 | ||
57 | /* | 54 | /* |
58 | * Writeback is about to end against a page which has been marked for immediate | 55 | * Writeback is about to end against a page which has been marked for immediate |
diff --git a/mm/thrash.c b/mm/thrash.c index eff3c18c33a1..f4c560b4a2b7 100644 --- a/mm/thrash.c +++ b/mm/thrash.c | |||
@@ -57,14 +57,17 @@ void grab_swap_token(void) | |||
57 | /* We have the token. Let others know we still need it. */ | 57 | /* We have the token. Let others know we still need it. */ |
58 | if (has_swap_token(current->mm)) { | 58 | if (has_swap_token(current->mm)) { |
59 | current->mm->recent_pagein = 1; | 59 | current->mm->recent_pagein = 1; |
60 | if (unlikely(!swap_token_default_timeout)) | ||
61 | disable_swap_token(); | ||
60 | return; | 62 | return; |
61 | } | 63 | } |
62 | 64 | ||
63 | if (time_after(jiffies, swap_token_check)) { | 65 | if (time_after(jiffies, swap_token_check)) { |
64 | 66 | ||
65 | /* Can't get swapout protection if we exceed our RSS limit. */ | 67 | if (!swap_token_default_timeout) { |
66 | // if (current->mm->rss > current->mm->rlimit_rss) | 68 | swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; |
67 | // return; | 69 | return; |
70 | } | ||
68 | 71 | ||
69 | /* ... or if we recently held the token. */ | 72 | /* ... or if we recently held the token. */ |
70 | if (time_before(jiffies, current->mm->swap_token_time)) | 73 | if (time_before(jiffies, current->mm->swap_token_time)) |
@@ -95,6 +98,7 @@ void __put_swap_token(struct mm_struct *mm) | |||
95 | { | 98 | { |
96 | spin_lock(&swap_token_lock); | 99 | spin_lock(&swap_token_lock); |
97 | if (likely(mm == swap_token_mm)) { | 100 | if (likely(mm == swap_token_mm)) { |
101 | mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; | ||
98 | swap_token_mm = &init_mm; | 102 | swap_token_mm = &init_mm; |
99 | swap_token_check = jiffies; | 103 | swap_token_check = jiffies; |
100 | } | 104 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index 29c18f68dc35..9173ab500604 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -282,8 +282,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
282 | * Zap the rest of the file in one hit. | 282 | * Zap the rest of the file in one hit. |
283 | */ | 283 | */ |
284 | unmap_mapping_range(mapping, | 284 | unmap_mapping_range(mapping, |
285 | page_index << PAGE_CACHE_SHIFT, | 285 | (loff_t)page_index<<PAGE_CACHE_SHIFT, |
286 | (end - page_index + 1) | 286 | (loff_t)(end - page_index + 1) |
287 | << PAGE_CACHE_SHIFT, | 287 | << PAGE_CACHE_SHIFT, |
288 | 0); | 288 | 0); |
289 | did_range_unmap = 1; | 289 | did_range_unmap = 1; |
@@ -292,7 +292,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
292 | * Just zap this page | 292 | * Just zap this page |
293 | */ | 293 | */ |
294 | unmap_mapping_range(mapping, | 294 | unmap_mapping_range(mapping, |
295 | page_index << PAGE_CACHE_SHIFT, | 295 | (loff_t)page_index<<PAGE_CACHE_SHIFT, |
296 | PAGE_CACHE_SIZE, 0); | 296 | PAGE_CACHE_SIZE, 0); |
297 | } | 297 | } |
298 | } | 298 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 28130541270f..b0cd81c32de6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -201,13 +201,25 @@ static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |||
201 | list_for_each_entry(shrinker, &shrinker_list, list) { | 201 | list_for_each_entry(shrinker, &shrinker_list, list) { |
202 | unsigned long long delta; | 202 | unsigned long long delta; |
203 | unsigned long total_scan; | 203 | unsigned long total_scan; |
204 | unsigned long max_pass = (*shrinker->shrinker)(0, gfp_mask); | ||
204 | 205 | ||
205 | delta = (4 * scanned) / shrinker->seeks; | 206 | delta = (4 * scanned) / shrinker->seeks; |
206 | delta *= (*shrinker->shrinker)(0, gfp_mask); | 207 | delta *= max_pass; |
207 | do_div(delta, lru_pages + 1); | 208 | do_div(delta, lru_pages + 1); |
208 | shrinker->nr += delta; | 209 | shrinker->nr += delta; |
209 | if (shrinker->nr < 0) | 210 | if (shrinker->nr < 0) { |
210 | shrinker->nr = LONG_MAX; /* It wrapped! */ | 211 | printk(KERN_ERR "%s: nr=%ld\n", |
212 | __FUNCTION__, shrinker->nr); | ||
213 | shrinker->nr = max_pass; | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Avoid risking looping forever due to too large nr value: | ||
218 | * never try to free more than twice the estimate number of | ||
219 | * freeable entries. | ||
220 | */ | ||
221 | if (shrinker->nr > max_pass * 2) | ||
222 | shrinker->nr = max_pass * 2; | ||
211 | 223 | ||
212 | total_scan = shrinker->nr; | 224 | total_scan = shrinker->nr; |
213 | shrinker->nr = 0; | 225 | shrinker->nr = 0; |
@@ -407,7 +419,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
407 | if (PageWriteback(page)) | 419 | if (PageWriteback(page)) |
408 | goto keep_locked; | 420 | goto keep_locked; |
409 | 421 | ||
410 | referenced = page_referenced(page, 1, sc->priority <= 0); | 422 | referenced = page_referenced(page, 1); |
411 | /* In active use or really unfreeable? Activate it. */ | 423 | /* In active use or really unfreeable? Activate it. */ |
412 | if (referenced && page_mapping_inuse(page)) | 424 | if (referenced && page_mapping_inuse(page)) |
413 | goto activate_locked; | 425 | goto activate_locked; |
@@ -756,7 +768,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
756 | if (page_mapped(page)) { | 768 | if (page_mapped(page)) { |
757 | if (!reclaim_mapped || | 769 | if (!reclaim_mapped || |
758 | (total_swap_pages == 0 && PageAnon(page)) || | 770 | (total_swap_pages == 0 && PageAnon(page)) || |
759 | page_referenced(page, 0, sc->priority <= 0)) { | 771 | page_referenced(page, 0)) { |
760 | list_add(&page->lru, &l_active); | 772 | list_add(&page->lru, &l_active); |
761 | continue; | 773 | continue; |
762 | } | 774 | } |
@@ -960,6 +972,8 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
960 | sc.nr_reclaimed = 0; | 972 | sc.nr_reclaimed = 0; |
961 | sc.priority = priority; | 973 | sc.priority = priority; |
962 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | 974 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; |
975 | if (!priority) | ||
976 | disable_swap_token(); | ||
963 | shrink_caches(zones, &sc); | 977 | shrink_caches(zones, &sc); |
964 | shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); | 978 | shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); |
965 | if (reclaim_state) { | 979 | if (reclaim_state) { |
@@ -1056,6 +1070,10 @@ loop_again: | |||
1056 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 1070 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
1057 | unsigned long lru_pages = 0; | 1071 | unsigned long lru_pages = 0; |
1058 | 1072 | ||
1073 | /* The swap token gets in the way of swapout... */ | ||
1074 | if (!priority) | ||
1075 | disable_swap_token(); | ||
1076 | |||
1059 | all_zones_ok = 1; | 1077 | all_zones_ok = 1; |
1060 | 1078 | ||
1061 | if (nr_pages == 0) { | 1079 | if (nr_pages == 0) { |
@@ -1360,6 +1378,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1360 | sc.nr_reclaimed = 0; | 1378 | sc.nr_reclaimed = 0; |
1361 | /* scan at the highest priority */ | 1379 | /* scan at the highest priority */ |
1362 | sc.priority = 0; | 1380 | sc.priority = 0; |
1381 | disable_swap_token(); | ||
1363 | 1382 | ||
1364 | if (nr_pages > SWAP_CLUSTER_MAX) | 1383 | if (nr_pages > SWAP_CLUSTER_MAX) |
1365 | sc.swap_cluster_max = nr_pages; | 1384 | sc.swap_cluster_max = nr_pages; |