summaryrefslogtreecommitdiffstats
path: root/mm/mlock.c
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2016-01-15 19:54:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 20:56:32 -0500
commite90309c9f7722db4ff5bce3b9e6e04d1460f2553 (patch)
tree79a9d09bdfcc8c653877f82b84de4f48e0472ab1 /mm/mlock.c
parenta46e63764eb6d0252ab4e96f96ad447594673274 (diff)
thp: allow mlocked THP again
Before THP refcounting rework, THP was not allowed to cross VMA boundary. So, if we have THP and we split it, PG_mlocked can be safely transferred to small pages. With new THP refcounting and naive approach to mlocking we can end up with this scenario: 1. we have a mlocked THP, which belong to one VM_LOCKED VMA. 2. the process does munlock() on the *part* of the THP: - the VMA is split into two, one of them VM_LOCKED; - huge PMD split into PTE table; - THP is still mlocked; 3. split_huge_page(): - it transfers PG_mlocked to *all* small pages regrardless if it blong to any VM_LOCKED VMA. We probably could munlock() all small pages on split_huge_page(), but I think we have accounting issue already on step two. Instead of forbidding mlocked pages altogether, we just avoid mlocking PTE-mapped THPs and munlock THPs on split_huge_pmd(). This means PTE-mapped THPs will be on normal lru lists and will be split under memory pressure by vmscan. After the split vmscan will detect unevictable small pages and mlock them. With this approach we shouldn't hit situation like described above. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Steve Capper <steve.capper@linaro.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c68
1 files changed, 47 insertions, 21 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index c6b139ad356a..9197b6721a1e 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -82,6 +82,9 @@ void mlock_vma_page(struct page *page)
82 /* Serialize with page migration */ 82 /* Serialize with page migration */
83 BUG_ON(!PageLocked(page)); 83 BUG_ON(!PageLocked(page));
84 84
85 VM_BUG_ON_PAGE(PageTail(page), page);
86 VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
87
85 if (!TestSetPageMlocked(page)) { 88 if (!TestSetPageMlocked(page)) {
86 mod_zone_page_state(page_zone(page), NR_MLOCK, 89 mod_zone_page_state(page_zone(page), NR_MLOCK,
87 hpage_nr_pages(page)); 90 hpage_nr_pages(page));
@@ -178,6 +181,8 @@ unsigned int munlock_vma_page(struct page *page)
178 /* For try_to_munlock() and to serialize with page migration */ 181 /* For try_to_munlock() and to serialize with page migration */
179 BUG_ON(!PageLocked(page)); 182 BUG_ON(!PageLocked(page));
180 183
184 VM_BUG_ON_PAGE(PageTail(page), page);
185
181 /* 186 /*
182 * Serialize with any parallel __split_huge_page_refcount() which 187 * Serialize with any parallel __split_huge_page_refcount() which
183 * might otherwise copy PageMlocked to part of the tail pages before 188 * might otherwise copy PageMlocked to part of the tail pages before
@@ -388,6 +393,13 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
388 if (!page || page_zone_id(page) != zoneid) 393 if (!page || page_zone_id(page) != zoneid)
389 break; 394 break;
390 395
396 /*
397 * Do not use pagevec for PTE-mapped THP,
398 * munlock_vma_pages_range() will handle them.
399 */
400 if (PageTransCompound(page))
401 break;
402
391 get_page(page); 403 get_page(page);
392 /* 404 /*
393 * Increase the address that will be returned *before* the 405 * Increase the address that will be returned *before* the
@@ -443,29 +455,43 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
443 page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, 455 page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
444 &page_mask); 456 &page_mask);
445 457
446 if (page && !IS_ERR(page) && !PageTransCompound(page)) { 458 if (page && !IS_ERR(page)) {
447 /* 459 if (PageTransTail(page)) {
448 * Non-huge pages are handled in batches via 460 VM_BUG_ON_PAGE(PageMlocked(page), page);
449 * pagevec. The pin from follow_page_mask() 461 put_page(page); /* follow_page_mask() */
450 * prevents them from collapsing by THP. 462 } else if (PageTransHuge(page)) {
451 */ 463 lock_page(page);
452 pagevec_add(&pvec, page); 464 /*
453 zone = page_zone(page); 465 * Any THP page found by follow_page_mask() may
454 zoneid = page_zone_id(page); 466 * have gotten split before reaching
467 * munlock_vma_page(), so we need to recompute
468 * the page_mask here.
469 */
470 page_mask = munlock_vma_page(page);
471 unlock_page(page);
472 put_page(page); /* follow_page_mask() */
473 } else {
474 /*
475 * Non-huge pages are handled in batches via
476 * pagevec. The pin from follow_page_mask()
477 * prevents them from collapsing by THP.
478 */
479 pagevec_add(&pvec, page);
480 zone = page_zone(page);
481 zoneid = page_zone_id(page);
455 482
456 /* 483 /*
457 * Try to fill the rest of pagevec using fast 484 * Try to fill the rest of pagevec using fast
458 * pte walk. This will also update start to 485 * pte walk. This will also update start to
459 * the next page to process. Then munlock the 486 * the next page to process. Then munlock the
460 * pagevec. 487 * pagevec.
461 */ 488 */
462 start = __munlock_pagevec_fill(&pvec, vma, 489 start = __munlock_pagevec_fill(&pvec, vma,
463 zoneid, start, end); 490 zoneid, start, end);
464 __munlock_pagevec(&pvec, zone); 491 __munlock_pagevec(&pvec, zone);
465 goto next; 492 goto next;
493 }
466 } 494 }
467 /* It's a bug to munlock in the middle of a THP page */
468 VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
469 page_increm = 1 + page_mask; 495 page_increm = 1 + page_mask;
470 start += page_increm * PAGE_SIZE; 496 start += page_increm * PAGE_SIZE;
471next: 497next: