aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2016-11-10 13:46:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-11-11 11:12:37 -0500
commit96b96a96ddee4ba08ce4aeb8a558a3271fd4a7a7 (patch)
treed3ba842688a35437e289411fe7bfe1969474efc4
parentd006c71f8ad2663dd47f81bf96bf655eeed428e2 (diff)
mm/hugetlb: fix huge page reservation leak in private mapping error paths
Error paths in hugetlb_cow() and hugetlb_no_page() may free a newly allocated huge page. If a reservation was associated with the huge page, alloc_huge_page() consumed the reservation while allocating. When the newly allocated page is freed in free_huge_page(), it will increment the global reservation count. However, the reservation entry in the reserve map will remain. This is not an issue for shared mappings as the entry in the reserve map indicates a reservation exists. But, an entry in a private mapping reserve map indicates the reservation was consumed and no longer exists. This results in an inconsistency between the reserve map and the global reservation count. This 'leaks' a reserved huge page. Create a new routine restore_reserve_on_error() to restore the reserve entry in these specific error paths. This routine makes use of a new function vma_add_reservation() which will add a reserve entry for a specific address/page. In general, these error paths were rarely (if ever) taken on most architectures. However, powerpc contained arch specific code that that resulted in an extra fault and execution of these error paths on all private mappings. Fixes: 67961f9db8c4 ("mm/hugetlb: fix huge page reserve accounting for private mappings) Link: http://lkml.kernel.org/r/1476933077-23091-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reported-by: Jan Stancek <jstancek@redhat.com> Tested-by: Jan Stancek <jstancek@redhat.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Kirill A . Shutemov <kirill.shutemov@linux.intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/hugetlb.c66
1 files changed, 66 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ec49d9ef1eef..418bf01a50ed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h,
1826 * is not the case is if a reserve map was changed between calls. It 1826 * is not the case is if a reserve map was changed between calls. It
1827 * is the responsibility of the caller to notice the difference and 1827 * is the responsibility of the caller to notice the difference and
1828 * take appropriate action. 1828 * take appropriate action.
1829 *
1830 * vma_add_reservation is used in error paths where a reservation must
1831 * be restored when a newly allocated huge page must be freed. It is
1832 * to be called after calling vma_needs_reservation to determine if a
1833 * reservation exists.
1829 */ 1834 */
1830enum vma_resv_mode { 1835enum vma_resv_mode {
1831 VMA_NEEDS_RESV, 1836 VMA_NEEDS_RESV,
1832 VMA_COMMIT_RESV, 1837 VMA_COMMIT_RESV,
1833 VMA_END_RESV, 1838 VMA_END_RESV,
1839 VMA_ADD_RESV,
1834}; 1840};
1835static long __vma_reservation_common(struct hstate *h, 1841static long __vma_reservation_common(struct hstate *h,
1836 struct vm_area_struct *vma, unsigned long addr, 1842 struct vm_area_struct *vma, unsigned long addr,
@@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h,
1856 region_abort(resv, idx, idx + 1); 1862 region_abort(resv, idx, idx + 1);
1857 ret = 0; 1863 ret = 0;
1858 break; 1864 break;
1865 case VMA_ADD_RESV:
1866 if (vma->vm_flags & VM_MAYSHARE)
1867 ret = region_add(resv, idx, idx + 1);
1868 else {
1869 region_abort(resv, idx, idx + 1);
1870 ret = region_del(resv, idx, idx + 1);
1871 }
1872 break;
1859 default: 1873 default:
1860 BUG(); 1874 BUG();
1861 } 1875 }
@@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h,
1903 (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV); 1917 (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
1904} 1918}
1905 1919
1920static long vma_add_reservation(struct hstate *h,
1921 struct vm_area_struct *vma, unsigned long addr)
1922{
1923 return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
1924}
1925
1926/*
1927 * This routine is called to restore a reservation on error paths. In the
1928 * specific error paths, a huge page was allocated (via alloc_huge_page)
1929 * and is about to be freed. If a reservation for the page existed,
1930 * alloc_huge_page would have consumed the reservation and set PagePrivate
1931 * in the newly allocated page. When the page is freed via free_huge_page,
1932 * the global reservation count will be incremented if PagePrivate is set.
1933 * However, free_huge_page can not adjust the reserve map. Adjust the
1934 * reserve map here to be consistent with global reserve count adjustments
1935 * to be made by free_huge_page.
1936 */
1937static void restore_reserve_on_error(struct hstate *h,
1938 struct vm_area_struct *vma, unsigned long address,
1939 struct page *page)
1940{
1941 if (unlikely(PagePrivate(page))) {
1942 long rc = vma_needs_reservation(h, vma, address);
1943
1944 if (unlikely(rc < 0)) {
1945 /*
1946 * Rare out of memory condition in reserve map
1947 * manipulation. Clear PagePrivate so that
1948 * global reserve count will not be incremented
1949 * by free_huge_page. This will make it appear
1950 * as though the reservation for this page was
1951 * consumed. This may prevent the task from
1952 * faulting in the page at a later time. This
1953 * is better than inconsistent global huge page
1954 * accounting of reserve counts.
1955 */
1956 ClearPagePrivate(page);
1957 } else if (rc) {
1958 rc = vma_add_reservation(h, vma, address);
1959 if (unlikely(rc < 0))
1960 /*
1961 * See above comment about rare out of
1962 * memory condition.
1963 */
1964 ClearPagePrivate(page);
1965 } else
1966 vma_end_reservation(h, vma, address);
1967 }
1968}
1969
1906struct page *alloc_huge_page(struct vm_area_struct *vma, 1970struct page *alloc_huge_page(struct vm_area_struct *vma,
1907 unsigned long addr, int avoid_reserve) 1971 unsigned long addr, int avoid_reserve)
1908{ 1972{
@@ -3498,6 +3562,7 @@ retry_avoidcopy:
3498 spin_unlock(ptl); 3562 spin_unlock(ptl);
3499 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 3563 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
3500out_release_all: 3564out_release_all:
3565 restore_reserve_on_error(h, vma, address, new_page);
3501 put_page(new_page); 3566 put_page(new_page);
3502out_release_old: 3567out_release_old:
3503 put_page(old_page); 3568 put_page(old_page);
@@ -3680,6 +3745,7 @@ backout:
3680 spin_unlock(ptl); 3745 spin_unlock(ptl);
3681backout_unlocked: 3746backout_unlocked:
3682 unlock_page(page); 3747 unlock_page(page);
3748 restore_reserve_on_error(h, vma, address, page);
3683 put_page(page); 3749 put_page(page);
3684 goto out; 3750 goto out;
3685} 3751}