aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c108
1 files changed, 99 insertions, 9 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 827d9c813051..d7ee1691fd21 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -19,6 +19,7 @@
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/mman.h> 20#include <linux/mman.h>
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/migrate.h>
22 23
23#include <asm/tlb.h> 24#include <asm/tlb.h>
24#include <asm/pgalloc.h> 25#include <asm/pgalloc.h>
@@ -690,7 +691,7 @@ out:
690} 691}
691__setup("transparent_hugepage=", setup_transparent_hugepage); 692__setup("transparent_hugepage=", setup_transparent_hugepage);
692 693
693static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) 694pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
694{ 695{
695 if (likely(vma->vm_flags & VM_WRITE)) 696 if (likely(vma->vm_flags & VM_WRITE))
696 pmd = pmd_mkwrite(pmd); 697 pmd = pmd_mkwrite(pmd);
@@ -848,7 +849,8 @@ out:
848 * run pte_offset_map on the pmd, if an huge pmd could 849 * run pte_offset_map on the pmd, if an huge pmd could
849 * materialize from under us from a different thread. 850 * materialize from under us from a different thread.
850 */ 851 */
851 if (unlikely(__pte_alloc(mm, vma, pmd, address))) 852 if (unlikely(pmd_none(*pmd)) &&
853 unlikely(__pte_alloc(mm, vma, pmd, address)))
852 return VM_FAULT_OOM; 854 return VM_FAULT_OOM;
853 /* if an huge pmd materialized from under us just retry later */ 855 /* if an huge pmd materialized from under us just retry later */
854 if (unlikely(pmd_trans_huge(*pmd))) 856 if (unlikely(pmd_trans_huge(*pmd)))
@@ -1287,6 +1289,81 @@ out:
1287 return page; 1289 return page;
1288} 1290}
1289 1291
1292/* NUMA hinting page fault entry point for trans huge pmds */
1293int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1294 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1295{
1296 struct page *page;
1297 unsigned long haddr = addr & HPAGE_PMD_MASK;
1298 int target_nid;
1299 int current_nid = -1;
1300 bool migrated;
1301 bool page_locked = false;
1302
1303 spin_lock(&mm->page_table_lock);
1304 if (unlikely(!pmd_same(pmd, *pmdp)))
1305 goto out_unlock;
1306
1307 page = pmd_page(pmd);
1308 get_page(page);
1309 current_nid = page_to_nid(page);
1310 count_vm_numa_event(NUMA_HINT_FAULTS);
1311 if (current_nid == numa_node_id())
1312 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1313
1314 target_nid = mpol_misplaced(page, vma, haddr);
1315 if (target_nid == -1) {
1316 put_page(page);
1317 goto clear_pmdnuma;
1318 }
1319
1320 /* Acquire the page lock to serialise THP migrations */
1321 spin_unlock(&mm->page_table_lock);
1322 lock_page(page);
1323 page_locked = true;
1324
1325 /* Confirm the PTE did not while locked */
1326 spin_lock(&mm->page_table_lock);
1327 if (unlikely(!pmd_same(pmd, *pmdp))) {
1328 unlock_page(page);
1329 put_page(page);
1330 goto out_unlock;
1331 }
1332 spin_unlock(&mm->page_table_lock);
1333
1334 /* Migrate the THP to the requested node */
1335 migrated = migrate_misplaced_transhuge_page(mm, vma,
1336 pmdp, pmd, addr,
1337 page, target_nid);
1338 if (migrated)
1339 current_nid = target_nid;
1340 else {
1341 spin_lock(&mm->page_table_lock);
1342 if (unlikely(!pmd_same(pmd, *pmdp))) {
1343 unlock_page(page);
1344 goto out_unlock;
1345 }
1346 goto clear_pmdnuma;
1347 }
1348
1349 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
1350 return 0;
1351
1352clear_pmdnuma:
1353 pmd = pmd_mknonnuma(pmd);
1354 set_pmd_at(mm, haddr, pmdp, pmd);
1355 VM_BUG_ON(pmd_numa(*pmdp));
1356 update_mmu_cache_pmd(vma, addr, pmdp);
1357 if (page_locked)
1358 unlock_page(page);
1359
1360out_unlock:
1361 spin_unlock(&mm->page_table_lock);
1362 if (current_nid != -1)
1363 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
1364 return 0;
1365}
1366
1290int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, 1367int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1291 pmd_t *pmd, unsigned long addr) 1368 pmd_t *pmd, unsigned long addr)
1292{ 1369{
@@ -1375,7 +1452,7 @@ out:
1375} 1452}
1376 1453
1377int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, 1454int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1378 unsigned long addr, pgprot_t newprot) 1455 unsigned long addr, pgprot_t newprot, int prot_numa)
1379{ 1456{
1380 struct mm_struct *mm = vma->vm_mm; 1457 struct mm_struct *mm = vma->vm_mm;
1381 int ret = 0; 1458 int ret = 0;
@@ -1383,7 +1460,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1383 if (__pmd_trans_huge_lock(pmd, vma) == 1) { 1460 if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1384 pmd_t entry; 1461 pmd_t entry;
1385 entry = pmdp_get_and_clear(mm, addr, pmd); 1462 entry = pmdp_get_and_clear(mm, addr, pmd);
1386 entry = pmd_modify(entry, newprot); 1463 if (!prot_numa)
1464 entry = pmd_modify(entry, newprot);
1465 else {
1466 struct page *page = pmd_page(*pmd);
1467
1468 /* only check non-shared pages */
1469 if (page_mapcount(page) == 1 &&
1470 !pmd_numa(*pmd)) {
1471 entry = pmd_mknuma(entry);
1472 }
1473 }
1387 BUG_ON(pmd_write(entry)); 1474 BUG_ON(pmd_write(entry));
1388 set_pmd_at(mm, addr, pmd, entry); 1475 set_pmd_at(mm, addr, pmd, entry);
1389 spin_unlock(&vma->vm_mm->page_table_lock); 1476 spin_unlock(&vma->vm_mm->page_table_lock);
@@ -1474,7 +1561,7 @@ static int __split_huge_page_splitting(struct page *page,
1474 * We can't temporarily set the pmd to null in order 1561 * We can't temporarily set the pmd to null in order
1475 * to split it, the pmd must remain marked huge at all 1562 * to split it, the pmd must remain marked huge at all
1476 * times or the VM won't take the pmd_trans_huge paths 1563 * times or the VM won't take the pmd_trans_huge paths
1477 * and it won't wait on the anon_vma->root->mutex to 1564 * and it won't wait on the anon_vma->root->rwsem to
1478 * serialize against split_huge_page*. 1565 * serialize against split_huge_page*.
1479 */ 1566 */
1480 pmdp_splitting_flush(vma, address, pmd); 1567 pmdp_splitting_flush(vma, address, pmd);
@@ -1565,6 +1652,7 @@ static void __split_huge_page_refcount(struct page *page)
1565 page_tail->mapping = page->mapping; 1652 page_tail->mapping = page->mapping;
1566 1653
1567 page_tail->index = page->index + i; 1654 page_tail->index = page->index + i;
1655 page_xchg_last_nid(page_tail, page_last_nid(page));
1568 1656
1569 BUG_ON(!PageAnon(page_tail)); 1657 BUG_ON(!PageAnon(page_tail));
1570 BUG_ON(!PageUptodate(page_tail)); 1658 BUG_ON(!PageUptodate(page_tail));
@@ -1632,6 +1720,8 @@ static int __split_huge_page_map(struct page *page,
1632 BUG_ON(page_mapcount(page) != 1); 1720 BUG_ON(page_mapcount(page) != 1);
1633 if (!pmd_young(*pmd)) 1721 if (!pmd_young(*pmd))
1634 entry = pte_mkold(entry); 1722 entry = pte_mkold(entry);
1723 if (pmd_numa(*pmd))
1724 entry = pte_mknuma(entry);
1635 pte = pte_offset_map(&_pmd, haddr); 1725 pte = pte_offset_map(&_pmd, haddr);
1636 BUG_ON(!pte_none(*pte)); 1726 BUG_ON(!pte_none(*pte));
1637 set_pte_at(mm, haddr, pte, entry); 1727 set_pte_at(mm, haddr, pte, entry);
@@ -1674,7 +1764,7 @@ static int __split_huge_page_map(struct page *page,
1674 return ret; 1764 return ret;
1675} 1765}
1676 1766
1677/* must be called with anon_vma->root->mutex hold */ 1767/* must be called with anon_vma->root->rwsem held */
1678static void __split_huge_page(struct page *page, 1768static void __split_huge_page(struct page *page,
1679 struct anon_vma *anon_vma) 1769 struct anon_vma *anon_vma)
1680{ 1770{
@@ -1729,7 +1819,7 @@ int split_huge_page(struct page *page)
1729 1819
1730 BUG_ON(is_huge_zero_pfn(page_to_pfn(page))); 1820 BUG_ON(is_huge_zero_pfn(page_to_pfn(page)));
1731 BUG_ON(!PageAnon(page)); 1821 BUG_ON(!PageAnon(page));
1732 anon_vma = page_lock_anon_vma(page); 1822 anon_vma = page_lock_anon_vma_read(page);
1733 if (!anon_vma) 1823 if (!anon_vma)
1734 goto out; 1824 goto out;
1735 ret = 0; 1825 ret = 0;
@@ -1742,7 +1832,7 @@ int split_huge_page(struct page *page)
1742 1832
1743 BUG_ON(PageCompound(page)); 1833 BUG_ON(PageCompound(page));
1744out_unlock: 1834out_unlock:
1745 page_unlock_anon_vma(anon_vma); 1835 page_unlock_anon_vma_read(anon_vma);
1746out: 1836out:
1747 return ret; 1837 return ret;
1748} 1838}
@@ -2234,7 +2324,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2234 if (pmd_trans_huge(*pmd)) 2324 if (pmd_trans_huge(*pmd))
2235 goto out; 2325 goto out;
2236 2326
2237 anon_vma_lock(vma->anon_vma); 2327 anon_vma_lock_write(vma->anon_vma);
2238 2328
2239 pte = pte_offset_map(pmd, address); 2329 pte = pte_offset_map(pmd, address);
2240 ptl = pte_lockptr(mm, pmd); 2330 ptl = pte_lockptr(mm, pmd);