aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:47:08 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:45 -0500
commit94fcc585fb85ad7b059c70872489b50044d401f3 (patch)
tree67efce3803149bec77df1f50a06f384deae02ba6 /mm
parentbc835011afbea3957217ee716093d791fb2fe44f (diff)
thp: avoid breaking huge pmd invariants in case of vma_adjust failures
An huge pmd can only be mapped if the corresponding 2M virtual range is fully contained in the vma. At times the VM calls split_vma twice, if the first split_vma succeeds and the second fail, the first split_vma remains in effect and it's not rolled back. For split_vma or vma_adjust to fail an allocation failure is needed so it's a very unlikely event (the out of memory killer would normally fire before any allocation failure is visible to kernel and userland and if an out of memory condition happens it's unlikely to happen exactly here). Nevertheless it's safer to ensure that no huge pmd can be left around if the vma is adjusted in a way that can't fit hugepages anymore at the new vm_start/vm_end address. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c80
-rw-r--r--mm/mmap.c2
2 files changed, 80 insertions, 2 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 30c3cec82023..b6facc35e893 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1075,8 +1075,16 @@ pmd_t *page_check_address_pmd(struct page *page,
1075 goto out; 1075 goto out;
1076 if (pmd_page(*pmd) != page) 1076 if (pmd_page(*pmd) != page)
1077 goto out; 1077 goto out;
1078 VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG && 1078 /*
1079 pmd_trans_splitting(*pmd)); 1079 * split_vma() may create temporary aliased mappings. There is
1080 * no risk as long as all huge pmd are found and have their
1081 * splitting bit set before __split_huge_page_refcount
1082 * runs. Finding the same huge pmd more than once during the
1083 * same rmap walk is not a problem.
1084 */
1085 if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
1086 pmd_trans_splitting(*pmd))
1087 goto out;
1080 if (pmd_trans_huge(*pmd)) { 1088 if (pmd_trans_huge(*pmd)) {
1081 VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG && 1089 VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
1082 !pmd_trans_splitting(*pmd)); 1090 !pmd_trans_splitting(*pmd));
@@ -2196,3 +2204,71 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
2196 put_page(page); 2204 put_page(page);
2197 BUG_ON(pmd_trans_huge(*pmd)); 2205 BUG_ON(pmd_trans_huge(*pmd));
2198} 2206}
2207
2208static void split_huge_page_address(struct mm_struct *mm,
2209 unsigned long address)
2210{
2211 pgd_t *pgd;
2212 pud_t *pud;
2213 pmd_t *pmd;
2214
2215 VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));
2216
2217 pgd = pgd_offset(mm, address);
2218 if (!pgd_present(*pgd))
2219 return;
2220
2221 pud = pud_offset(pgd, address);
2222 if (!pud_present(*pud))
2223 return;
2224
2225 pmd = pmd_offset(pud, address);
2226 if (!pmd_present(*pmd))
2227 return;
2228 /*
2229 * Caller holds the mmap_sem write mode, so a huge pmd cannot
2230 * materialize from under us.
2231 */
2232 split_huge_page_pmd(mm, pmd);
2233}
2234
2235void __vma_adjust_trans_huge(struct vm_area_struct *vma,
2236 unsigned long start,
2237 unsigned long end,
2238 long adjust_next)
2239{
2240 /*
2241 * If the new start address isn't hpage aligned and it could
2242 * previously contain an hugepage: check if we need to split
2243 * an huge pmd.
2244 */
2245 if (start & ~HPAGE_PMD_MASK &&
2246 (start & HPAGE_PMD_MASK) >= vma->vm_start &&
2247 (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
2248 split_huge_page_address(vma->vm_mm, start);
2249
2250 /*
2251 * If the new end address isn't hpage aligned and it could
2252 * previously contain an hugepage: check if we need to split
2253 * an huge pmd.
2254 */
2255 if (end & ~HPAGE_PMD_MASK &&
2256 (end & HPAGE_PMD_MASK) >= vma->vm_start &&
2257 (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
2258 split_huge_page_address(vma->vm_mm, end);
2259
2260 /*
2261 * If we're also updating the vma->vm_next->vm_start, if the new
2262 * vm_next->vm_start isn't page aligned and it could previously
2263 * contain an hugepage: check if we need to split an huge pmd.
2264 */
2265 if (adjust_next > 0) {
2266 struct vm_area_struct *next = vma->vm_next;
2267 unsigned long nstart = next->vm_start;
2268 nstart += adjust_next << PAGE_SHIFT;
2269 if (nstart & ~HPAGE_PMD_MASK &&
2270 (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
2271 (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
2272 split_huge_page_address(next->vm_mm, nstart);
2273 }
2274}
diff --git a/mm/mmap.c b/mm/mmap.c
index 753f44d17047..73cc648873d6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -589,6 +589,8 @@ again: remove_next = 1 + (end > next->vm_end);
589 } 589 }
590 } 590 }
591 591
592 vma_adjust_trans_huge(vma, start, end, adjust_next);
593
592 /* 594 /*
593 * When changing only vma->vm_end, we don't really need anon_vma 595 * When changing only vma->vm_end, we don't really need anon_vma
594 * lock. This is a fairly rare case by itself, but the anon_vma 596 * lock. This is a fairly rare case by itself, but the anon_vma