aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2014-02-11 22:43:38 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-02-16 19:19:36 -0500
commit56eecdb912b536a4fa97fb5bfe5a940a54d79be6 (patch)
treeb42119ab8f9e2a49298f08ba29f78f348a3b579c /mm/huge_memory.c
parent9d85d5863fa4818eb7fa306563bf830c0210c3a6 (diff)
mm: Use ptep/pmdp_set_numa() for updating _PAGE_NUMA bit
Archs like ppc64 doesn't do tlb flush in set_pte/pmd functions when using a hash table MMU for various reasons (the flush is handled as part of the PTE modification when necessary). ppc64 thus doesn't implement flush_tlb_range for hash based MMUs. Additionally ppc64 require the tlb flushing to be batched within ptl locks. The reason to do that is to ensure that the hash page table is in sync with linux page table. We track the hpte index in linux pte and if we clear them without flushing hash and drop the ptl lock, we can have another cpu update the pte and can end up with duplicate entry in the hash table, which is fatal. We also want to keep set_pte_at simpler by not requiring them to do hash flush for performance reason. We do that by assuming that set_pte_at() is never *ever* called on a PTE that is already valid. This was the case until the NUMA code went in which broke that assumption. Fix that by introducing a new pair of helpers to set _PAGE_NUMA in a way similar to ptep/pmdp_set_wrprotect(), with a generic implementation using set_pte_at() and a powerpc specific one using the appropriate mechanism needed to keep the hash table in sync. Acked-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c9
1 files changed, 2 insertions, 7 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 82166bf974e1..da23eb96779f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1545,6 +1545,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1545 entry = pmd_mknonnuma(entry); 1545 entry = pmd_mknonnuma(entry);
1546 entry = pmd_modify(entry, newprot); 1546 entry = pmd_modify(entry, newprot);
1547 ret = HPAGE_PMD_NR; 1547 ret = HPAGE_PMD_NR;
1548 set_pmd_at(mm, addr, pmd, entry);
1548 BUG_ON(pmd_write(entry)); 1549 BUG_ON(pmd_write(entry));
1549 } else { 1550 } else {
1550 struct page *page = pmd_page(*pmd); 1551 struct page *page = pmd_page(*pmd);
@@ -1557,16 +1558,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1557 */ 1558 */
1558 if (!is_huge_zero_page(page) && 1559 if (!is_huge_zero_page(page) &&
1559 !pmd_numa(*pmd)) { 1560 !pmd_numa(*pmd)) {
1560 entry = *pmd; 1561 pmdp_set_numa(mm, addr, pmd);
1561 entry = pmd_mknuma(entry);
1562 ret = HPAGE_PMD_NR; 1562 ret = HPAGE_PMD_NR;
1563 } 1563 }
1564 } 1564 }
1565
1566 /* Set PMD if cleared earlier */
1567 if (ret == HPAGE_PMD_NR)
1568 set_pmd_at(mm, addr, pmd, entry);
1569
1570 spin_unlock(ptl); 1565 spin_unlock(ptl);
1571 } 1566 }
1572 1567