Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

Conflicts: Documentation/feature-removal-schedule.txt drivers/net/wan/hdlc_fr.c drivers/net/wireless/iwlwifi/iwl-4965.c drivers/net/wireless/iwlwifi/iwl3945-base.c
author: David S. Miller <davem@davemloft.net> 2008-07-06 02:08:07 -0400
committer: David S. Miller <davem@davemloft.net> 2008-07-06 02:08:07 -0400
commit: ea2aca084ba82aaf7c148d04914ceed8758ce08a (patch)
tree: dcb3f4f849cf48deac2dd3bafd5c2cd2f0e7dc79 /mm/memory.c
parent: f3032be921cd126615ce3bfd7084e3d319f3f892 (diff)
parent: c5a78ac00c400df29645e59938700301efb371d0 (diff)
1 files changed, 60 insertions, 7 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 19e0ae9beecb..d14b251a25a6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -999,17 +999,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                goto no_page_table;
        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
-        if (!ptep)
-                goto out;
        pte = *ptep;
        if (!pte_present(pte))
-                goto unlock;
+                goto no_page;
        if ((flags & FOLL_WRITE) && !pte_write(pte))
                goto unlock;
        page = vm_normal_page(vma, address, pte);
        if (unlikely(!page))
-                goto unlock;
+                goto bad_page;
        if (flags & FOLL_GET)
                get_page(page);
@@ -1024,6 +1022,15 @@ unlock:
 out:
        return page;
+bad_page:
+        pte_unmap_unlock(ptep, ptl);
+        return ERR_PTR(-EFAULT);
+no_page:
+        pte_unmap_unlock(ptep, ptl);
+        if (!pte_none(pte))
+                return page;
+        /* Fall through to ZERO_PAGE handling */
 no_page_table:
        /*
         * When core dumping an enormous anonymous area that nobody
@@ -1038,6 +1045,26 @@ no_page_table:
        return page;
 }
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+        /*
+         * We don't want to optimize FOLL_ANON for make_pages_present()
+         * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+         * we want to get the page from the page tables to make sure
+         * that we serialize and update with any other user of that
+         * mapping.
+         */
+        if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+                return 0;
+        /*
+         * And if we have a fault or a nopfn routine, it's not an
+         * anonymous region.
+         */
+        return !vma->vm_ops ||
+                (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+}
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, int len, int write, int force,
                struct page **pages, struct vm_area_struct **vmas)
@@ -1112,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                foll_flags = FOLL_TOUCH;
                if (pages)
                        foll_flags |= FOLL_GET;
-                if (!write && !(vma->vm_flags & VM_LOCKED) &&
+                if (!write && use_zero_page(vma))
-                    (!vma->vm_ops || !vma->vm_ops->fault))
                        foll_flags |= FOLL_ANON;
                do {
@@ -1159,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                cond_resched();
                        }
+                        if (IS_ERR(page))
+                                return i ? i : PTR_ERR(page);
                        if (pages) {
                                pages[i] = page;
@@ -1757,7 +1785,6 @@ gotten:
        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
        if (likely(pte_same(*page_table, orig_pte))) {
                if (old_page) {
-                        page_remove_rmap(old_page, vma);
                        if (!PageAnon(old_page)) {
                                dec_mm_counter(mm, file_rss);
                                inc_mm_counter(mm, anon_rss);
@@ -1779,6 +1806,32 @@ gotten:
                lru_cache_add_active(new_page);
                page_add_new_anon_rmap(new_page, vma, address);
+                if (old_page) {
+                        /*
+                         * Only after switching the pte to the new page may
+                         * we remove the mapcount here. Otherwise another
+                         * process may come and find the rmap count decremented
+                         * before the pte is switched to the new page, and
+                         * "reuse" the old page writing into it while our pte
+                         * here still points into it and can be read by other
+                         * threads.
+                         *
+                         * The critical issue is to order this
+                         * page_remove_rmap with the ptp_clear_flush above.
+                         * Those stores are ordered by (if nothing else,)
+                         * the barrier present in the atomic_add_negative
+                         * in page_remove_rmap.
+                         *
+                         * Then the TLB flush in ptep_clear_flush ensures that
+                         * no process can access the old page before the
+                         * decremented mapcount is visible. And the old page
+                         * cannot be reused until after the decremented
+                         * mapcount is visible. So transitively, TLBs to
+                         * old page will be flushed before it can be reused.
+                         */
+                        page_remove_rmap(old_page, vma);
+                }
                /* Free the old page.. */
                new_page = old_page;
                ret |= VM_FAULT_WRITE;
author	David S. Miller <davem@davemloft.net>	2008-07-06 02:08:07 -0400
committer	David S. Miller <davem@davemloft.net>	2008-07-06 02:08:07 -0400
commit	ea2aca084ba82aaf7c148d04914ceed8758ce08a (patch)
tree	dcb3f4f849cf48deac2dd3bafd5c2cd2f0e7dc79 /mm/memory.c
parent	f3032be921cd126615ce3bfd7084e3d319f3f892 (diff)
parent	c5a78ac00c400df29645e59938700301efb371d0 (diff)

diff --git a/mm/memory.c b/mm/memory.c index 19e0ae9beecb..d14b251a25a6 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -999,17 +999,15 @@ struct page follow_page(struct vm_area_struct vma, unsigned long address,
999	goto no_page_table;	999	goto no_page_table;
1000		1000
1001	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);	1001	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
1002	if (!ptep)
1003	goto out;
1004		1002
1005	pte = *ptep;	1003	pte = *ptep;
1006	if (!pte_present(pte))	1004	if (!pte_present(pte))
1007	goto unlock;	1005	goto no_page;
1008	if ((flags & FOLL_WRITE) && !pte_write(pte))	1006	if ((flags & FOLL_WRITE) && !pte_write(pte))
1009	goto unlock;	1007	goto unlock;
1010	page = vm_normal_page(vma, address, pte);	1008	page = vm_normal_page(vma, address, pte);
1011	if (unlikely(!page))	1009	if (unlikely(!page))
1012	goto unlock;	1010	goto bad_page;
1013		1011
1014	if (flags & FOLL_GET)	1012	if (flags & FOLL_GET)
1015	get_page(page);	1013	get_page(page);
@@ -1024,6 +1022,15 @@ unlock:
1024	out:	1022	out:
1025	return page;	1023	return page;
1026		1024
		1025	bad_page:
		1026	pte_unmap_unlock(ptep, ptl);
		1027	return ERR_PTR(-EFAULT);
		1028
		1029	no_page:
		1030	pte_unmap_unlock(ptep, ptl);
		1031	if (!pte_none(pte))
		1032	return page;
		1033	/* Fall through to ZERO_PAGE handling */
1027	no_page_table:	1034	no_page_table:
1028	/*	1035	/*
1029	* When core dumping an enormous anonymous area that nobody	1036	* When core dumping an enormous anonymous area that nobody
@@ -1038,6 +1045,26 @@ no_page_table:
1038	return page;	1045	return page;
1039	}	1046	}
1040		1047
		1048	/* Can we do the FOLL_ANON optimization? */
		1049	static inline int use_zero_page(struct vm_area_struct *vma)
		1050	{
		1051	/*
		1052	* We don't want to optimize FOLL_ANON for make_pages_present()
		1053	* when it tries to page in a VM_LOCKED region. As to VM_SHARED,
		1054	* we want to get the page from the page tables to make sure
		1055	* that we serialize and update with any other user of that
		1056	* mapping.
		1057	*/
		1058	if (vma->vm_flags & (VM_LOCKED \| VM_SHARED))
		1059	return 0;
		1060	/*
		1061	* And if we have a fault or a nopfn routine, it's not an
		1062	* anonymous region.
		1063	*/
		1064	return !vma->vm_ops \|\|
		1065	(!vma->vm_ops->fault && !vma->vm_ops->nopfn);
		1066	}
		1067
1041	int get_user_pages(struct task_struct tsk, struct mm_struct mm,	1068	int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1042	unsigned long start, int len, int write, int force,	1069	unsigned long start, int len, int write, int force,
1043	struct page pages, struct vm_area_struct vmas)	1070	struct page pages, struct vm_area_struct vmas)
@@ -1112,8 +1139,7 @@ int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1112	foll_flags = FOLL_TOUCH;	1139	foll_flags = FOLL_TOUCH;
1113	if (pages)	1140	if (pages)
1114	foll_flags \|= FOLL_GET;	1141	foll_flags \|= FOLL_GET;
1115	if (!write && !(vma->vm_flags & VM_LOCKED) &&	1142	if (!write && use_zero_page(vma))
1116	(!vma->vm_ops \|\| !vma->vm_ops->fault))
1117	foll_flags \|= FOLL_ANON;	1143	foll_flags \|= FOLL_ANON;
1118		1144
1119	do {	1145	do {
@@ -1159,6 +1185,8 @@ int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1159		1185
1160	cond_resched();	1186	cond_resched();
1161	}	1187	}
		1188	if (IS_ERR(page))
		1189	return i ? i : PTR_ERR(page);
1162	if (pages) {	1190	if (pages) {
1163	pages[i] = page;	1191	pages[i] = page;
1164		1192
@@ -1757,7 +1785,6 @@ gotten:
1757	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);	1785	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1758	if (likely(pte_same(*page_table, orig_pte))) {	1786	if (likely(pte_same(*page_table, orig_pte))) {
1759	if (old_page) {	1787	if (old_page) {
1760	page_remove_rmap(old_page, vma);
1761	if (!PageAnon(old_page)) {	1788	if (!PageAnon(old_page)) {
1762	dec_mm_counter(mm, file_rss);	1789	dec_mm_counter(mm, file_rss);
1763	inc_mm_counter(mm, anon_rss);	1790	inc_mm_counter(mm, anon_rss);
@@ -1779,6 +1806,32 @@ gotten:
1779	lru_cache_add_active(new_page);	1806	lru_cache_add_active(new_page);
1780	page_add_new_anon_rmap(new_page, vma, address);	1807	page_add_new_anon_rmap(new_page, vma, address);
1781		1808
		1809	if (old_page) {
		1810	/*
		1811	* Only after switching the pte to the new page may
		1812	* we remove the mapcount here. Otherwise another
		1813	* process may come and find the rmap count decremented
		1814	* before the pte is switched to the new page, and
		1815	* "reuse" the old page writing into it while our pte
		1816	* here still points into it and can be read by other
		1817	* threads.
		1818	*
		1819	* The critical issue is to order this
		1820	* page_remove_rmap with the ptp_clear_flush above.
		1821	* Those stores are ordered by (if nothing else,)
		1822	* the barrier present in the atomic_add_negative
		1823	* in page_remove_rmap.
		1824	*
		1825	* Then the TLB flush in ptep_clear_flush ensures that
		1826	* no process can access the old page before the
		1827	* decremented mapcount is visible. And the old page
		1828	* cannot be reused until after the decremented
		1829	* mapcount is visible. So transitively, TLBs to
		1830	* old page will be flushed before it can be reused.
		1831	*/
		1832	page_remove_rmap(old_page, vma);
		1833	}
		1834
1782	/* Free the old page.. */	1835	/* Free the old page.. */
1783	new_page = old_page;	1836	new_page = old_page;
1784	ret \|= VM_FAULT_WRITE;	1837	ret \|= VM_FAULT_WRITE;