aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ksm.h2
-rw-r--r--include/linux/rmap.h2
-rw-r--r--mm/ksm.c60
-rw-r--r--mm/rmap.c210
4 files changed, 80 insertions, 194 deletions
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 91b9719722c3..3be6bb18562d 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -73,8 +73,6 @@ static inline void set_page_stable_node(struct page *page,
73struct page *ksm_might_need_to_copy(struct page *page, 73struct page *ksm_might_need_to_copy(struct page *page,
74 struct vm_area_struct *vma, unsigned long address); 74 struct vm_area_struct *vma, unsigned long address);
75 75
76int page_referenced_ksm(struct page *page,
77 struct mem_cgroup *memcg, unsigned long *vm_flags);
78int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); 76int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
79void ksm_migrate_page(struct page *newpage, struct page *oldpage); 77void ksm_migrate_page(struct page *newpage, struct page *oldpage);
80 78
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 2462458708cd..1da693d51255 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -184,7 +184,7 @@ static inline void page_dup_rmap(struct page *page)
184int page_referenced(struct page *, int is_locked, 184int page_referenced(struct page *, int is_locked,
185 struct mem_cgroup *memcg, unsigned long *vm_flags); 185 struct mem_cgroup *memcg, unsigned long *vm_flags);
186int page_referenced_one(struct page *, struct vm_area_struct *, 186int page_referenced_one(struct page *, struct vm_area_struct *,
187 unsigned long address, unsigned int *mapcount, unsigned long *vm_flags); 187 unsigned long address, void *arg);
188 188
189#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) 189#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
190 190
diff --git a/mm/ksm.c b/mm/ksm.c
index 646d45a6b6c8..3df141e5f3e0 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1891,61 +1891,6 @@ struct page *ksm_might_need_to_copy(struct page *page,
1891 return new_page; 1891 return new_page;
1892} 1892}
1893 1893
1894int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1895 unsigned long *vm_flags)
1896{
1897 struct stable_node *stable_node;
1898 struct rmap_item *rmap_item;
1899 unsigned int mapcount = page_mapcount(page);
1900 int referenced = 0;
1901 int search_new_forks = 0;
1902
1903 VM_BUG_ON(!PageKsm(page));
1904 VM_BUG_ON(!PageLocked(page));
1905
1906 stable_node = page_stable_node(page);
1907 if (!stable_node)
1908 return 0;
1909again:
1910 hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
1911 struct anon_vma *anon_vma = rmap_item->anon_vma;
1912 struct anon_vma_chain *vmac;
1913 struct vm_area_struct *vma;
1914
1915 anon_vma_lock_read(anon_vma);
1916 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1917 0, ULONG_MAX) {
1918 vma = vmac->vma;
1919 if (rmap_item->address < vma->vm_start ||
1920 rmap_item->address >= vma->vm_end)
1921 continue;
1922 /*
1923 * Initially we examine only the vma which covers this
1924 * rmap_item; but later, if there is still work to do,
1925 * we examine covering vmas in other mms: in case they
1926 * were forked from the original since ksmd passed.
1927 */
1928 if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
1929 continue;
1930
1931 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
1932 continue;
1933
1934 referenced += page_referenced_one(page, vma,
1935 rmap_item->address, &mapcount, vm_flags);
1936 if (!search_new_forks || !mapcount)
1937 break;
1938 }
1939 anon_vma_unlock_read(anon_vma);
1940 if (!mapcount)
1941 goto out;
1942 }
1943 if (!search_new_forks++)
1944 goto again;
1945out:
1946 return referenced;
1947}
1948
1949int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) 1894int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
1950{ 1895{
1951 struct stable_node *stable_node; 1896 struct stable_node *stable_node;
@@ -1954,6 +1899,11 @@ int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
1954 int search_new_forks = 0; 1899 int search_new_forks = 0;
1955 1900
1956 VM_BUG_ON(!PageKsm(page)); 1901 VM_BUG_ON(!PageKsm(page));
1902
1903 /*
1904 * Rely on the page lock to protect against concurrent modifications
1905 * to that page's node of the stable tree.
1906 */
1957 VM_BUG_ON(!PageLocked(page)); 1907 VM_BUG_ON(!PageLocked(page));
1958 1908
1959 stable_node = page_stable_node(page); 1909 stable_node = page_stable_node(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index c73e0c645d09..080413036406 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -660,17 +660,22 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
660 return 1; 660 return 1;
661} 661}
662 662
663struct page_referenced_arg {
664 int mapcount;
665 int referenced;
666 unsigned long vm_flags;
667 struct mem_cgroup *memcg;
668};
663/* 669/*
664 * Subfunctions of page_referenced: page_referenced_one called 670 * arg: page_referenced_arg will be passed
665 * repeatedly from either page_referenced_anon or page_referenced_file.
666 */ 671 */
667int page_referenced_one(struct page *page, struct vm_area_struct *vma, 672int page_referenced_one(struct page *page, struct vm_area_struct *vma,
668 unsigned long address, unsigned int *mapcount, 673 unsigned long address, void *arg)
669 unsigned long *vm_flags)
670{ 674{
671 struct mm_struct *mm = vma->vm_mm; 675 struct mm_struct *mm = vma->vm_mm;
672 spinlock_t *ptl; 676 spinlock_t *ptl;
673 int referenced = 0; 677 int referenced = 0;
678 struct page_referenced_arg *pra = arg;
674 679
675 if (unlikely(PageTransHuge(page))) { 680 if (unlikely(PageTransHuge(page))) {
676 pmd_t *pmd; 681 pmd_t *pmd;
@@ -682,13 +687,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
682 pmd = page_check_address_pmd(page, mm, address, 687 pmd = page_check_address_pmd(page, mm, address,
683 PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); 688 PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
684 if (!pmd) 689 if (!pmd)
685 goto out; 690 return SWAP_AGAIN;
686 691
687 if (vma->vm_flags & VM_LOCKED) { 692 if (vma->vm_flags & VM_LOCKED) {
688 spin_unlock(ptl); 693 spin_unlock(ptl);
689 *mapcount = 0; /* break early from loop */ 694 pra->vm_flags |= VM_LOCKED;
690 *vm_flags |= VM_LOCKED; 695 return SWAP_FAIL; /* To break the loop */
691 goto out;
692 } 696 }
693 697
694 /* go ahead even if the pmd is pmd_trans_splitting() */ 698 /* go ahead even if the pmd is pmd_trans_splitting() */
@@ -704,13 +708,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
704 */ 708 */
705 pte = page_check_address(page, mm, address, &ptl, 0); 709 pte = page_check_address(page, mm, address, &ptl, 0);
706 if (!pte) 710 if (!pte)
707 goto out; 711 return SWAP_AGAIN;
708 712
709 if (vma->vm_flags & VM_LOCKED) { 713 if (vma->vm_flags & VM_LOCKED) {
710 pte_unmap_unlock(pte, ptl); 714 pte_unmap_unlock(pte, ptl);
711 *mapcount = 0; /* break early from loop */ 715 pra->vm_flags |= VM_LOCKED;
712 *vm_flags |= VM_LOCKED; 716 return SWAP_FAIL; /* To break the loop */
713 goto out;
714 } 717 }
715 718
716 if (ptep_clear_flush_young_notify(vma, address, pte)) { 719 if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -727,113 +730,27 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
727 pte_unmap_unlock(pte, ptl); 730 pte_unmap_unlock(pte, ptl);
728 } 731 }
729 732
730 (*mapcount)--; 733 if (referenced) {
731 734 pra->referenced++;
732 if (referenced) 735 pra->vm_flags |= vma->vm_flags;
733 *vm_flags |= vma->vm_flags;
734out:
735 return referenced;
736}
737
738static int page_referenced_anon(struct page *page,
739 struct mem_cgroup *memcg,
740 unsigned long *vm_flags)
741{
742 unsigned int mapcount;
743 struct anon_vma *anon_vma;
744 pgoff_t pgoff;
745 struct anon_vma_chain *avc;
746 int referenced = 0;
747
748 anon_vma = page_lock_anon_vma_read(page);
749 if (!anon_vma)
750 return referenced;
751
752 mapcount = page_mapcount(page);
753 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
754 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
755 struct vm_area_struct *vma = avc->vma;
756 unsigned long address = vma_address(page, vma);
757 /*
758 * If we are reclaiming on behalf of a cgroup, skip
759 * counting on behalf of references from different
760 * cgroups
761 */
762 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
763 continue;
764 referenced += page_referenced_one(page, vma, address,
765 &mapcount, vm_flags);
766 if (!mapcount)
767 break;
768 } 736 }
769 737
770 page_unlock_anon_vma_read(anon_vma); 738 pra->mapcount--;
771 return referenced; 739 if (!pra->mapcount)
740 return SWAP_SUCCESS; /* To break the loop */
741
742 return SWAP_AGAIN;
772} 743}
773 744
774/** 745static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
775 * page_referenced_file - referenced check for object-based rmap
776 * @page: the page we're checking references on.
777 * @memcg: target memory control group
778 * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
779 *
780 * For an object-based mapped page, find all the places it is mapped and
781 * check/clear the referenced flag. This is done by following the page->mapping
782 * pointer, then walking the chain of vmas it holds. It returns the number
783 * of references it found.
784 *
785 * This function is only called from page_referenced for object-based pages.
786 */
787static int page_referenced_file(struct page *page,
788 struct mem_cgroup *memcg,
789 unsigned long *vm_flags)
790{ 746{
791 unsigned int mapcount; 747 struct page_referenced_arg *pra = arg;
792 struct address_space *mapping = page->mapping; 748 struct mem_cgroup *memcg = pra->memcg;
793 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
794 struct vm_area_struct *vma;
795 int referenced = 0;
796
797 /*
798 * The caller's checks on page->mapping and !PageAnon have made
799 * sure that this is a file page: the check for page->mapping
800 * excludes the case just before it gets set on an anon page.
801 */
802 BUG_ON(PageAnon(page));
803 749
804 /* 750 if (!mm_match_cgroup(vma->vm_mm, memcg))
805 * The page lock not only makes sure that page->mapping cannot 751 return true;
806 * suddenly be NULLified by truncation, it makes sure that the
807 * structure at mapping cannot be freed and reused yet,
808 * so we can safely take mapping->i_mmap_mutex.
809 */
810 BUG_ON(!PageLocked(page));
811
812 mutex_lock(&mapping->i_mmap_mutex);
813
814 /*
815 * i_mmap_mutex does not stabilize mapcount at all, but mapcount
816 * is more likely to be accurate if we note it after spinning.
817 */
818 mapcount = page_mapcount(page);
819
820 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
821 unsigned long address = vma_address(page, vma);
822 /*
823 * If we are reclaiming on behalf of a cgroup, skip
824 * counting on behalf of references from different
825 * cgroups
826 */
827 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
828 continue;
829 referenced += page_referenced_one(page, vma, address,
830 &mapcount, vm_flags);
831 if (!mapcount)
832 break;
833 }
834 752
835 mutex_unlock(&mapping->i_mmap_mutex); 753 return false;
836 return referenced;
837} 754}
838 755
839/** 756/**
@@ -851,32 +768,47 @@ int page_referenced(struct page *page,
851 struct mem_cgroup *memcg, 768 struct mem_cgroup *memcg,
852 unsigned long *vm_flags) 769 unsigned long *vm_flags)
853{ 770{
854 int referenced = 0; 771 int ret;
855 int we_locked = 0; 772 int we_locked = 0;
773 struct page_referenced_arg pra = {
774 .mapcount = page_mapcount(page),
775 .memcg = memcg,
776 };
777 struct rmap_walk_control rwc = {
778 .rmap_one = page_referenced_one,
779 .arg = (void *)&pra,
780 .anon_lock = page_lock_anon_vma_read,
781 };
856 782
857 *vm_flags = 0; 783 *vm_flags = 0;
858 if (page_mapped(page) && page_rmapping(page)) { 784 if (!page_mapped(page))
859 if (!is_locked && (!PageAnon(page) || PageKsm(page))) { 785 return 0;
860 we_locked = trylock_page(page); 786
861 if (!we_locked) { 787 if (!page_rmapping(page))
862 referenced++; 788 return 0;
863 goto out; 789
864 } 790 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
865 } 791 we_locked = trylock_page(page);
866 if (unlikely(PageKsm(page))) 792 if (!we_locked)
867 referenced += page_referenced_ksm(page, memcg, 793 return 1;
868 vm_flags);
869 else if (PageAnon(page))
870 referenced += page_referenced_anon(page, memcg,
871 vm_flags);
872 else if (page->mapping)
873 referenced += page_referenced_file(page, memcg,
874 vm_flags);
875 if (we_locked)
876 unlock_page(page);
877 } 794 }
878out: 795
879 return referenced; 796 /*
797 * If we are reclaiming on behalf of a cgroup, skip
798 * counting on behalf of references from different
799 * cgroups
800 */
801 if (memcg) {
802 rwc.invalid_vma = invalid_page_referenced_vma;
803 }
804
805 ret = rmap_walk(page, &rwc);
806 *vm_flags = pra.vm_flags;
807
808 if (we_locked)
809 unlock_page(page);
810
811 return pra.referenced;
880} 812}
881 813
882static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, 814static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
@@ -1700,6 +1632,14 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
1700 struct vm_area_struct *vma; 1632 struct vm_area_struct *vma;
1701 int ret = SWAP_AGAIN; 1633 int ret = SWAP_AGAIN;
1702 1634
1635 /*
1636 * The page lock not only makes sure that page->mapping cannot
1637 * suddenly be NULLified by truncation, it makes sure that the
1638 * structure at mapping cannot be freed and reused yet,
1639 * so we can safely take mapping->i_mmap_mutex.
1640 */
1641 VM_BUG_ON(!PageLocked(page));
1642
1703 if (!mapping) 1643 if (!mapping)
1704 return ret; 1644 return ret;
1705 mutex_lock(&mapping->i_mmap_mutex); 1645 mutex_lock(&mapping->i_mmap_mutex);
@@ -1731,8 +1671,6 @@ done:
1731 1671
1732int rmap_walk(struct page *page, struct rmap_walk_control *rwc) 1672int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
1733{ 1673{
1734 VM_BUG_ON(!PageLocked(page));
1735
1736 if (unlikely(PageKsm(page))) 1674 if (unlikely(PageKsm(page)))
1737 return rmap_walk_ksm(page, rwc); 1675 return rmap_walk_ksm(page, rwc);
1738 else if (PageAnon(page)) 1676 else if (PageAnon(page))