diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 108 |
1 files changed, 99 insertions, 9 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 827d9c813051..d7ee1691fd21 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/freezer.h> | 19 | #include <linux/freezer.h> |
20 | #include <linux/mman.h> | 20 | #include <linux/mman.h> |
21 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
22 | #include <linux/migrate.h> | ||
22 | 23 | ||
23 | #include <asm/tlb.h> | 24 | #include <asm/tlb.h> |
24 | #include <asm/pgalloc.h> | 25 | #include <asm/pgalloc.h> |
@@ -690,7 +691,7 @@ out: | |||
690 | } | 691 | } |
691 | __setup("transparent_hugepage=", setup_transparent_hugepage); | 692 | __setup("transparent_hugepage=", setup_transparent_hugepage); |
692 | 693 | ||
693 | static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) | 694 | pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) |
694 | { | 695 | { |
695 | if (likely(vma->vm_flags & VM_WRITE)) | 696 | if (likely(vma->vm_flags & VM_WRITE)) |
696 | pmd = pmd_mkwrite(pmd); | 697 | pmd = pmd_mkwrite(pmd); |
@@ -848,7 +849,8 @@ out: | |||
848 | * run pte_offset_map on the pmd, if an huge pmd could | 849 | * run pte_offset_map on the pmd, if an huge pmd could |
849 | * materialize from under us from a different thread. | 850 | * materialize from under us from a different thread. |
850 | */ | 851 | */ |
851 | if (unlikely(__pte_alloc(mm, vma, pmd, address))) | 852 | if (unlikely(pmd_none(*pmd)) && |
853 | unlikely(__pte_alloc(mm, vma, pmd, address))) | ||
852 | return VM_FAULT_OOM; | 854 | return VM_FAULT_OOM; |
853 | /* if an huge pmd materialized from under us just retry later */ | 855 | /* if an huge pmd materialized from under us just retry later */ |
854 | if (unlikely(pmd_trans_huge(*pmd))) | 856 | if (unlikely(pmd_trans_huge(*pmd))) |
@@ -1287,6 +1289,81 @@ out: | |||
1287 | return page; | 1289 | return page; |
1288 | } | 1290 | } |
1289 | 1291 | ||
1292 | /* NUMA hinting page fault entry point for trans huge pmds */ | ||
1293 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
1294 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) | ||
1295 | { | ||
1296 | struct page *page; | ||
1297 | unsigned long haddr = addr & HPAGE_PMD_MASK; | ||
1298 | int target_nid; | ||
1299 | int current_nid = -1; | ||
1300 | bool migrated; | ||
1301 | bool page_locked = false; | ||
1302 | |||
1303 | spin_lock(&mm->page_table_lock); | ||
1304 | if (unlikely(!pmd_same(pmd, *pmdp))) | ||
1305 | goto out_unlock; | ||
1306 | |||
1307 | page = pmd_page(pmd); | ||
1308 | get_page(page); | ||
1309 | current_nid = page_to_nid(page); | ||
1310 | count_vm_numa_event(NUMA_HINT_FAULTS); | ||
1311 | if (current_nid == numa_node_id()) | ||
1312 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | ||
1313 | |||
1314 | target_nid = mpol_misplaced(page, vma, haddr); | ||
1315 | if (target_nid == -1) { | ||
1316 | put_page(page); | ||
1317 | goto clear_pmdnuma; | ||
1318 | } | ||
1319 | |||
1320 | /* Acquire the page lock to serialise THP migrations */ | ||
1321 | spin_unlock(&mm->page_table_lock); | ||
1322 | lock_page(page); | ||
1323 | page_locked = true; | ||
1324 | |||
1325 | /* Confirm the PTE did not while locked */ | ||
1326 | spin_lock(&mm->page_table_lock); | ||
1327 | if (unlikely(!pmd_same(pmd, *pmdp))) { | ||
1328 | unlock_page(page); | ||
1329 | put_page(page); | ||
1330 | goto out_unlock; | ||
1331 | } | ||
1332 | spin_unlock(&mm->page_table_lock); | ||
1333 | |||
1334 | /* Migrate the THP to the requested node */ | ||
1335 | migrated = migrate_misplaced_transhuge_page(mm, vma, | ||
1336 | pmdp, pmd, addr, | ||
1337 | page, target_nid); | ||
1338 | if (migrated) | ||
1339 | current_nid = target_nid; | ||
1340 | else { | ||
1341 | spin_lock(&mm->page_table_lock); | ||
1342 | if (unlikely(!pmd_same(pmd, *pmdp))) { | ||
1343 | unlock_page(page); | ||
1344 | goto out_unlock; | ||
1345 | } | ||
1346 | goto clear_pmdnuma; | ||
1347 | } | ||
1348 | |||
1349 | task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); | ||
1350 | return 0; | ||
1351 | |||
1352 | clear_pmdnuma: | ||
1353 | pmd = pmd_mknonnuma(pmd); | ||
1354 | set_pmd_at(mm, haddr, pmdp, pmd); | ||
1355 | VM_BUG_ON(pmd_numa(*pmdp)); | ||
1356 | update_mmu_cache_pmd(vma, addr, pmdp); | ||
1357 | if (page_locked) | ||
1358 | unlock_page(page); | ||
1359 | |||
1360 | out_unlock: | ||
1361 | spin_unlock(&mm->page_table_lock); | ||
1362 | if (current_nid != -1) | ||
1363 | task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); | ||
1364 | return 0; | ||
1365 | } | ||
1366 | |||
1290 | int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | 1367 | int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, |
1291 | pmd_t *pmd, unsigned long addr) | 1368 | pmd_t *pmd, unsigned long addr) |
1292 | { | 1369 | { |
@@ -1375,7 +1452,7 @@ out: | |||
1375 | } | 1452 | } |
1376 | 1453 | ||
1377 | int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | 1454 | int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, |
1378 | unsigned long addr, pgprot_t newprot) | 1455 | unsigned long addr, pgprot_t newprot, int prot_numa) |
1379 | { | 1456 | { |
1380 | struct mm_struct *mm = vma->vm_mm; | 1457 | struct mm_struct *mm = vma->vm_mm; |
1381 | int ret = 0; | 1458 | int ret = 0; |
@@ -1383,7 +1460,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1383 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { | 1460 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { |
1384 | pmd_t entry; | 1461 | pmd_t entry; |
1385 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1462 | entry = pmdp_get_and_clear(mm, addr, pmd); |
1386 | entry = pmd_modify(entry, newprot); | 1463 | if (!prot_numa) |
1464 | entry = pmd_modify(entry, newprot); | ||
1465 | else { | ||
1466 | struct page *page = pmd_page(*pmd); | ||
1467 | |||
1468 | /* only check non-shared pages */ | ||
1469 | if (page_mapcount(page) == 1 && | ||
1470 | !pmd_numa(*pmd)) { | ||
1471 | entry = pmd_mknuma(entry); | ||
1472 | } | ||
1473 | } | ||
1387 | BUG_ON(pmd_write(entry)); | 1474 | BUG_ON(pmd_write(entry)); |
1388 | set_pmd_at(mm, addr, pmd, entry); | 1475 | set_pmd_at(mm, addr, pmd, entry); |
1389 | spin_unlock(&vma->vm_mm->page_table_lock); | 1476 | spin_unlock(&vma->vm_mm->page_table_lock); |
@@ -1474,7 +1561,7 @@ static int __split_huge_page_splitting(struct page *page, | |||
1474 | * We can't temporarily set the pmd to null in order | 1561 | * We can't temporarily set the pmd to null in order |
1475 | * to split it, the pmd must remain marked huge at all | 1562 | * to split it, the pmd must remain marked huge at all |
1476 | * times or the VM won't take the pmd_trans_huge paths | 1563 | * times or the VM won't take the pmd_trans_huge paths |
1477 | * and it won't wait on the anon_vma->root->mutex to | 1564 | * and it won't wait on the anon_vma->root->rwsem to |
1478 | * serialize against split_huge_page*. | 1565 | * serialize against split_huge_page*. |
1479 | */ | 1566 | */ |
1480 | pmdp_splitting_flush(vma, address, pmd); | 1567 | pmdp_splitting_flush(vma, address, pmd); |
@@ -1565,6 +1652,7 @@ static void __split_huge_page_refcount(struct page *page) | |||
1565 | page_tail->mapping = page->mapping; | 1652 | page_tail->mapping = page->mapping; |
1566 | 1653 | ||
1567 | page_tail->index = page->index + i; | 1654 | page_tail->index = page->index + i; |
1655 | page_xchg_last_nid(page_tail, page_last_nid(page)); | ||
1568 | 1656 | ||
1569 | BUG_ON(!PageAnon(page_tail)); | 1657 | BUG_ON(!PageAnon(page_tail)); |
1570 | BUG_ON(!PageUptodate(page_tail)); | 1658 | BUG_ON(!PageUptodate(page_tail)); |
@@ -1632,6 +1720,8 @@ static int __split_huge_page_map(struct page *page, | |||
1632 | BUG_ON(page_mapcount(page) != 1); | 1720 | BUG_ON(page_mapcount(page) != 1); |
1633 | if (!pmd_young(*pmd)) | 1721 | if (!pmd_young(*pmd)) |
1634 | entry = pte_mkold(entry); | 1722 | entry = pte_mkold(entry); |
1723 | if (pmd_numa(*pmd)) | ||
1724 | entry = pte_mknuma(entry); | ||
1635 | pte = pte_offset_map(&_pmd, haddr); | 1725 | pte = pte_offset_map(&_pmd, haddr); |
1636 | BUG_ON(!pte_none(*pte)); | 1726 | BUG_ON(!pte_none(*pte)); |
1637 | set_pte_at(mm, haddr, pte, entry); | 1727 | set_pte_at(mm, haddr, pte, entry); |
@@ -1674,7 +1764,7 @@ static int __split_huge_page_map(struct page *page, | |||
1674 | return ret; | 1764 | return ret; |
1675 | } | 1765 | } |
1676 | 1766 | ||
1677 | /* must be called with anon_vma->root->mutex hold */ | 1767 | /* must be called with anon_vma->root->rwsem held */ |
1678 | static void __split_huge_page(struct page *page, | 1768 | static void __split_huge_page(struct page *page, |
1679 | struct anon_vma *anon_vma) | 1769 | struct anon_vma *anon_vma) |
1680 | { | 1770 | { |
@@ -1729,7 +1819,7 @@ int split_huge_page(struct page *page) | |||
1729 | 1819 | ||
1730 | BUG_ON(is_huge_zero_pfn(page_to_pfn(page))); | 1820 | BUG_ON(is_huge_zero_pfn(page_to_pfn(page))); |
1731 | BUG_ON(!PageAnon(page)); | 1821 | BUG_ON(!PageAnon(page)); |
1732 | anon_vma = page_lock_anon_vma(page); | 1822 | anon_vma = page_lock_anon_vma_read(page); |
1733 | if (!anon_vma) | 1823 | if (!anon_vma) |
1734 | goto out; | 1824 | goto out; |
1735 | ret = 0; | 1825 | ret = 0; |
@@ -1742,7 +1832,7 @@ int split_huge_page(struct page *page) | |||
1742 | 1832 | ||
1743 | BUG_ON(PageCompound(page)); | 1833 | BUG_ON(PageCompound(page)); |
1744 | out_unlock: | 1834 | out_unlock: |
1745 | page_unlock_anon_vma(anon_vma); | 1835 | page_unlock_anon_vma_read(anon_vma); |
1746 | out: | 1836 | out: |
1747 | return ret; | 1837 | return ret; |
1748 | } | 1838 | } |
@@ -2234,7 +2324,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2234 | if (pmd_trans_huge(*pmd)) | 2324 | if (pmd_trans_huge(*pmd)) |
2235 | goto out; | 2325 | goto out; |
2236 | 2326 | ||
2237 | anon_vma_lock(vma->anon_vma); | 2327 | anon_vma_lock_write(vma->anon_vma); |
2238 | 2328 | ||
2239 | pte = pte_offset_map(pmd, address); | 2329 | pte = pte_offset_map(pmd, address); |
2240 | ptl = pte_lockptr(mm, pmd); | 2330 | ptl = pte_lockptr(mm, pmd); |