aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2014-04-04 04:28:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 19:16:55 -0400
commitcda540ace6a194850e23c79955cc2e46fd91c19a (patch)
tree886bd309085e4cc16f671186cbf34eca6b415aa1
parentd15e03104eb9a4f8e244ab6ed3ca5a107e46db13 (diff)
mm: get_user_pages(write,force) refuse to COW in shared areas
get_user_pages(write=1, force=1) has always had odd behaviour on write- protected shared mappings: although it demands FMODE_WRITE-access to the underlying object (do_mmap_pgoff sets neither VM_SHARED nor VM_MAYWRITE without that), it ends up with do_wp_page substituting private anonymous Copied-On-Write pages for the shared file pages in the area. That was long ago intentional, as a safety measure to prevent ptrace setting a breakpoint (or POKETEXT or POKEDATA) from inadvertently corrupting the underlying executable. Yet exec and dynamic loaders open the file read-only, and use MAP_PRIVATE rather than MAP_SHARED. The traditional odd behaviour still causes surprises and bugs in mm, and is probably not what any caller wants - even the comment on the flag says "You do not want this" (although it's undoubtedly necessary for overriding userspace protections in some contexts, and good when !write). Let's stop doing that. But it would be dangerous to remove the long- standing safety at this stage, so just make get_user_pages(write,force) fail with EFAULT when applied to a write-protected shared area. Infiniband may in future want to force write through to underlying object: we can add another FOLL_flag later to enable that if required. Odd though the old behaviour was, there is no doubt that we may turn out to break userspace with this change, and have to revert it quickly. Issue a WARN_ON_ONCE to help debug the changed case (easily triggered by userspace, so only once to prevent spamming the logs); and delay a few associated cleanups until this change is proved. get_user_pages callers who might see trouble from this change: ptrace poking, or writing to /proc/<pid>/mem drivers/infiniband/ drivers/media/v4l2-core/ drivers/gpu/drm/exynos/exynos_drm_gem.c drivers/staging/tidspbridge/core/tiomap3430.c if they ever apply get_user_pages to write-protected shared mappings of an object which was opened for writing. I went to apply the same change to mm/nommu.c, but retreated. NOMMU has no place for COW, and its VM_flags conventions are not the same: I'd be more likely to screw up NOMMU than make an improvement there. Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory.c66
1 files changed, 45 insertions, 21 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 90cea22001ef..82c1e4cf00d1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1705,15 +1705,6 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1705 1705
1706 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); 1706 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
1707 1707
1708 /*
1709 * Require read or write permissions.
1710 * If FOLL_FORCE is set, we only require the "MAY" flags.
1711 */
1712 vm_flags = (gup_flags & FOLL_WRITE) ?
1713 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1714 vm_flags &= (gup_flags & FOLL_FORCE) ?
1715 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1716
1717 /* 1708 /*
1718 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault 1709 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
1719 * would be called on PROT_NONE ranges. We must never invoke 1710 * would be called on PROT_NONE ranges. We must never invoke
@@ -1741,7 +1732,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1741 1732
1742 /* user gate pages are read-only */ 1733 /* user gate pages are read-only */
1743 if (gup_flags & FOLL_WRITE) 1734 if (gup_flags & FOLL_WRITE)
1744 return i ? : -EFAULT; 1735 goto efault;
1745 if (pg > TASK_SIZE) 1736 if (pg > TASK_SIZE)
1746 pgd = pgd_offset_k(pg); 1737 pgd = pgd_offset_k(pg);
1747 else 1738 else
@@ -1751,12 +1742,12 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1751 BUG_ON(pud_none(*pud)); 1742 BUG_ON(pud_none(*pud));
1752 pmd = pmd_offset(pud, pg); 1743 pmd = pmd_offset(pud, pg);
1753 if (pmd_none(*pmd)) 1744 if (pmd_none(*pmd))
1754 return i ? : -EFAULT; 1745 goto efault;
1755 VM_BUG_ON(pmd_trans_huge(*pmd)); 1746 VM_BUG_ON(pmd_trans_huge(*pmd));
1756 pte = pte_offset_map(pmd, pg); 1747 pte = pte_offset_map(pmd, pg);
1757 if (pte_none(*pte)) { 1748 if (pte_none(*pte)) {
1758 pte_unmap(pte); 1749 pte_unmap(pte);
1759 return i ? : -EFAULT; 1750 goto efault;
1760 } 1751 }
1761 vma = get_gate_vma(mm); 1752 vma = get_gate_vma(mm);
1762 if (pages) { 1753 if (pages) {
@@ -1769,7 +1760,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1769 page = pte_page(*pte); 1760 page = pte_page(*pte);
1770 else { 1761 else {
1771 pte_unmap(pte); 1762 pte_unmap(pte);
1772 return i ? : -EFAULT; 1763 goto efault;
1773 } 1764 }
1774 } 1765 }
1775 pages[i] = page; 1766 pages[i] = page;
@@ -1780,10 +1771,42 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1780 goto next_page; 1771 goto next_page;
1781 } 1772 }
1782 1773
1783 if (!vma || 1774 if (!vma)
1784 (vma->vm_flags & (VM_IO | VM_PFNMAP)) || 1775 goto efault;
1785 !(vm_flags & vma->vm_flags)) 1776 vm_flags = vma->vm_flags;
1786 return i ? : -EFAULT; 1777 if (vm_flags & (VM_IO | VM_PFNMAP))
1778 goto efault;
1779
1780 if (gup_flags & FOLL_WRITE) {
1781 if (!(vm_flags & VM_WRITE)) {
1782 if (!(gup_flags & FOLL_FORCE))
1783 goto efault;
1784 /*
1785 * We used to let the write,force case do COW
1786 * in a VM_MAYWRITE VM_SHARED !VM_WRITE vma, so
1787 * ptrace could set a breakpoint in a read-only
1788 * mapping of an executable, without corrupting
1789 * the file (yet only when that file had been
1790 * opened for writing!). Anon pages in shared
1791 * mappings are surprising: now just reject it.
1792 */
1793 if (!is_cow_mapping(vm_flags)) {
1794 WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
1795 goto efault;
1796 }
1797 }
1798 } else {
1799 if (!(vm_flags & VM_READ)) {
1800 if (!(gup_flags & FOLL_FORCE))
1801 goto efault;
1802 /*
1803 * Is there actually any vma we can reach here
1804 * which does not have VM_MAYREAD set?
1805 */
1806 if (!(vm_flags & VM_MAYREAD))
1807 goto efault;
1808 }
1809 }
1787 1810
1788 if (is_vm_hugetlb_page(vma)) { 1811 if (is_vm_hugetlb_page(vma)) {
1789 i = follow_hugetlb_page(mm, vma, pages, vmas, 1812 i = follow_hugetlb_page(mm, vma, pages, vmas,
@@ -1837,7 +1860,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1837 return -EFAULT; 1860 return -EFAULT;
1838 } 1861 }
1839 if (ret & VM_FAULT_SIGBUS) 1862 if (ret & VM_FAULT_SIGBUS)
1840 return i ? i : -EFAULT; 1863 goto efault;
1841 BUG(); 1864 BUG();
1842 } 1865 }
1843 1866
@@ -1895,6 +1918,8 @@ next_page:
1895 } while (nr_pages && start < vma->vm_end); 1918 } while (nr_pages && start < vma->vm_end);
1896 } while (nr_pages); 1919 } while (nr_pages);
1897 return i; 1920 return i;
1921efault:
1922 return i ? : -EFAULT;
1898} 1923}
1899EXPORT_SYMBOL(__get_user_pages); 1924EXPORT_SYMBOL(__get_user_pages);
1900 1925
@@ -1962,9 +1987,8 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
1962 * @start: starting user address 1987 * @start: starting user address
1963 * @nr_pages: number of pages from start to pin 1988 * @nr_pages: number of pages from start to pin
1964 * @write: whether pages will be written to by the caller 1989 * @write: whether pages will be written to by the caller
1965 * @force: whether to force write access even if user mapping is 1990 * @force: whether to force access even when user mapping is currently
1966 * readonly. This will result in the page being COWed even 1991 * protected (but never forces write access to shared mapping).
1967 * in MAP_SHARED mappings. You do not want this.
1968 * @pages: array that receives pointers to the pages pinned. 1992 * @pages: array that receives pointers to the pages pinned.
1969 * Should be at least nr_pages long. Or NULL, if caller 1993 * Should be at least nr_pages long. Or NULL, if caller
1970 * only intends to ensure the pages are faulted in. 1994 * only intends to ensure the pages are faulted in.