aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/proc/task_mmu.c13
-rw-r--r--include/linux/huge_mm.h14
-rw-r--r--mm/huge_memory.c40
-rw-r--r--mm/memcontrol.c10
4 files changed, 46 insertions, 31 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8faaebdc6b02..42b5cf5d0326 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
506 pte_t *pte; 506 pte_t *pte;
507 spinlock_t *ptl; 507 spinlock_t *ptl;
508 508
509 if (pmd_trans_huge_lock(pmd, vma) == 1) { 509 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
510 smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); 510 smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
511 spin_unlock(&walk->mm->page_table_lock); 511 spin_unlock(ptl);
512 mss->anonymous_thp += HPAGE_PMD_SIZE; 512 mss->anonymous_thp += HPAGE_PMD_SIZE;
513 return 0; 513 return 0;
514 } 514 }
@@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
999{ 999{
1000 struct vm_area_struct *vma; 1000 struct vm_area_struct *vma;
1001 struct pagemapread *pm = walk->private; 1001 struct pagemapread *pm = walk->private;
1002 spinlock_t *ptl;
1002 pte_t *pte; 1003 pte_t *pte;
1003 int err = 0; 1004 int err = 0;
1004 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); 1005 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
1005 1006
1006 /* find the first VMA at or above 'addr' */ 1007 /* find the first VMA at or above 'addr' */
1007 vma = find_vma(walk->mm, addr); 1008 vma = find_vma(walk->mm, addr);
1008 if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { 1009 if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1009 int pmd_flags2; 1010 int pmd_flags2;
1010 1011
1011 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) 1012 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1023 if (err) 1024 if (err)
1024 break; 1025 break;
1025 } 1026 }
1026 spin_unlock(&walk->mm->page_table_lock); 1027 spin_unlock(ptl);
1027 return err; 1028 return err;
1028 } 1029 }
1029 1030
@@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1325 1326
1326 md = walk->private; 1327 md = walk->private;
1327 1328
1328 if (pmd_trans_huge_lock(pmd, md->vma) == 1) { 1329 if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
1329 pte_t huge_pte = *(pte_t *)pmd; 1330 pte_t huge_pte = *(pte_t *)pmd;
1330 struct page *page; 1331 struct page *page;
1331 1332
@@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1333 if (page) 1334 if (page)
1334 gather_stats(page, md, pte_dirty(huge_pte), 1335 gather_stats(page, md, pte_dirty(huge_pte),
1335 HPAGE_PMD_SIZE/PAGE_SIZE); 1336 HPAGE_PMD_SIZE/PAGE_SIZE);
1336 spin_unlock(&walk->mm->page_table_lock); 1337 spin_unlock(ptl);
1337 return 0; 1338 return 0;
1338 } 1339 }
1339 1340
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 3935428c57cf..4aca0d8da112 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
129 unsigned long start, 129 unsigned long start,
130 unsigned long end, 130 unsigned long end,
131 long adjust_next); 131 long adjust_next);
132extern int __pmd_trans_huge_lock(pmd_t *pmd, 132extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
133 struct vm_area_struct *vma); 133 spinlock_t **ptl);
134/* mmap_sem must be held on entry */ 134/* mmap_sem must be held on entry */
135static inline int pmd_trans_huge_lock(pmd_t *pmd, 135static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
136 struct vm_area_struct *vma) 136 spinlock_t **ptl)
137{ 137{
138 VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); 138 VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
139 if (pmd_trans_huge(*pmd)) 139 if (pmd_trans_huge(*pmd))
140 return __pmd_trans_huge_lock(pmd, vma); 140 return __pmd_trans_huge_lock(pmd, vma, ptl);
141 else 141 else
142 return 0; 142 return 0;
143} 143}
@@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
215 long adjust_next) 215 long adjust_next)
216{ 216{
217} 217}
218static inline int pmd_trans_huge_lock(pmd_t *pmd, 218static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
219 struct vm_area_struct *vma) 219 spinlock_t **ptl)
220{ 220{
221 return 0; 221 return 0;
222} 222}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e5b2d316be2e..471eb04066ff 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1376,9 +1376,10 @@ out:
1376int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, 1376int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1377 pmd_t *pmd, unsigned long addr) 1377 pmd_t *pmd, unsigned long addr)
1378{ 1378{
1379 spinlock_t *ptl;
1379 int ret = 0; 1380 int ret = 0;
1380 1381
1381 if (__pmd_trans_huge_lock(pmd, vma) == 1) { 1382 if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1382 struct page *page; 1383 struct page *page;
1383 pgtable_t pgtable; 1384 pgtable_t pgtable;
1384 pmd_t orig_pmd; 1385 pmd_t orig_pmd;
@@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1393 pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); 1394 pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
1394 if (is_huge_zero_pmd(orig_pmd)) { 1395 if (is_huge_zero_pmd(orig_pmd)) {
1395 atomic_long_dec(&tlb->mm->nr_ptes); 1396 atomic_long_dec(&tlb->mm->nr_ptes);
1396 spin_unlock(&tlb->mm->page_table_lock); 1397 spin_unlock(ptl);
1397 put_huge_zero_page(); 1398 put_huge_zero_page();
1398 } else { 1399 } else {
1399 page = pmd_page(orig_pmd); 1400 page = pmd_page(orig_pmd);
@@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1402 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); 1403 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
1403 VM_BUG_ON(!PageHead(page)); 1404 VM_BUG_ON(!PageHead(page));
1404 atomic_long_dec(&tlb->mm->nr_ptes); 1405 atomic_long_dec(&tlb->mm->nr_ptes);
1405 spin_unlock(&tlb->mm->page_table_lock); 1406 spin_unlock(ptl);
1406 tlb_remove_page(tlb, page); 1407 tlb_remove_page(tlb, page);
1407 } 1408 }
1408 pte_free(tlb->mm, pgtable); 1409 pte_free(tlb->mm, pgtable);
@@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1415 unsigned long addr, unsigned long end, 1416 unsigned long addr, unsigned long end,
1416 unsigned char *vec) 1417 unsigned char *vec)
1417{ 1418{
1419 spinlock_t *ptl;
1418 int ret = 0; 1420 int ret = 0;
1419 1421
1420 if (__pmd_trans_huge_lock(pmd, vma) == 1) { 1422 if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1421 /* 1423 /*
1422 * All logical pages in the range are present 1424 * All logical pages in the range are present
1423 * if backed by a huge page. 1425 * if backed by a huge page.
1424 */ 1426 */
1425 spin_unlock(&vma->vm_mm->page_table_lock); 1427 spin_unlock(ptl);
1426 memset(vec, 1, (end - addr) >> PAGE_SHIFT); 1428 memset(vec, 1, (end - addr) >> PAGE_SHIFT);
1427 ret = 1; 1429 ret = 1;
1428 } 1430 }
@@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
1435 unsigned long new_addr, unsigned long old_end, 1437 unsigned long new_addr, unsigned long old_end,
1436 pmd_t *old_pmd, pmd_t *new_pmd) 1438 pmd_t *old_pmd, pmd_t *new_pmd)
1437{ 1439{
1440 spinlock_t *old_ptl, *new_ptl;
1438 int ret = 0; 1441 int ret = 0;
1439 pmd_t pmd; 1442 pmd_t pmd;
1440 1443
@@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
1455 goto out; 1458 goto out;
1456 } 1459 }
1457 1460
1458 ret = __pmd_trans_huge_lock(old_pmd, vma); 1461 /*
1462 * We don't have to worry about the ordering of src and dst
1463 * ptlocks because exclusive mmap_sem prevents deadlock.
1464 */
1465 ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
1459 if (ret == 1) { 1466 if (ret == 1) {
1467 new_ptl = pmd_lockptr(mm, new_pmd);
1468 if (new_ptl != old_ptl)
1469 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
1460 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); 1470 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
1461 VM_BUG_ON(!pmd_none(*new_pmd)); 1471 VM_BUG_ON(!pmd_none(*new_pmd));
1462 set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); 1472 set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
1463 spin_unlock(&mm->page_table_lock); 1473 if (new_ptl != old_ptl)
1474 spin_unlock(new_ptl);
1475 spin_unlock(old_ptl);
1464 } 1476 }
1465out: 1477out:
1466 return ret; 1478 return ret;
@@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1476 unsigned long addr, pgprot_t newprot, int prot_numa) 1488 unsigned long addr, pgprot_t newprot, int prot_numa)
1477{ 1489{
1478 struct mm_struct *mm = vma->vm_mm; 1490 struct mm_struct *mm = vma->vm_mm;
1491 spinlock_t *ptl;
1479 int ret = 0; 1492 int ret = 0;
1480 1493
1481 if (__pmd_trans_huge_lock(pmd, vma) == 1) { 1494 if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1482 pmd_t entry; 1495 pmd_t entry;
1483 ret = 1; 1496 ret = 1;
1484 if (!prot_numa) { 1497 if (!prot_numa) {
@@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1507 if (ret == HPAGE_PMD_NR) 1520 if (ret == HPAGE_PMD_NR)
1508 set_pmd_at(mm, addr, pmd, entry); 1521 set_pmd_at(mm, addr, pmd, entry);
1509 1522
1510 spin_unlock(&vma->vm_mm->page_table_lock); 1523 spin_unlock(ptl);
1511 } 1524 }
1512 1525
1513 return ret; 1526 return ret;
@@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1520 * Note that if it returns 1, this routine returns without unlocking page 1533 * Note that if it returns 1, this routine returns without unlocking page
1521 * table locks. So callers must unlock them. 1534 * table locks. So callers must unlock them.
1522 */ 1535 */
1523int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) 1536int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
1537 spinlock_t **ptl)
1524{ 1538{
1525 spin_lock(&vma->vm_mm->page_table_lock); 1539 *ptl = pmd_lock(vma->vm_mm, pmd);
1526 if (likely(pmd_trans_huge(*pmd))) { 1540 if (likely(pmd_trans_huge(*pmd))) {
1527 if (unlikely(pmd_trans_splitting(*pmd))) { 1541 if (unlikely(pmd_trans_splitting(*pmd))) {
1528 spin_unlock(&vma->vm_mm->page_table_lock); 1542 spin_unlock(*ptl);
1529 wait_split_huge_page(vma->anon_vma, pmd); 1543 wait_split_huge_page(vma->anon_vma, pmd);
1530 return -1; 1544 return -1;
1531 } else { 1545 } else {
@@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
1534 return 1; 1548 return 1;
1535 } 1549 }
1536 } 1550 }
1537 spin_unlock(&vma->vm_mm->page_table_lock); 1551 spin_unlock(*ptl);
1538 return 0; 1552 return 0;
1539} 1553}
1540 1554
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e3cd40b2d5d9..f1a0ae6e11b8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
6605 pte_t *pte; 6605 pte_t *pte;
6606 spinlock_t *ptl; 6606 spinlock_t *ptl;
6607 6607
6608 if (pmd_trans_huge_lock(pmd, vma) == 1) { 6608 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
6609 if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE) 6609 if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
6610 mc.precharge += HPAGE_PMD_NR; 6610 mc.precharge += HPAGE_PMD_NR;
6611 spin_unlock(&vma->vm_mm->page_table_lock); 6611 spin_unlock(ptl);
6612 return 0; 6612 return 0;
6613 } 6613 }
6614 6614
@@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
6797 * to be unlocked in __split_huge_page_splitting(), where the main 6797 * to be unlocked in __split_huge_page_splitting(), where the main
6798 * part of thp split is not executed yet. 6798 * part of thp split is not executed yet.
6799 */ 6799 */
6800 if (pmd_trans_huge_lock(pmd, vma) == 1) { 6800 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
6801 if (mc.precharge < HPAGE_PMD_NR) { 6801 if (mc.precharge < HPAGE_PMD_NR) {
6802 spin_unlock(&vma->vm_mm->page_table_lock); 6802 spin_unlock(ptl);
6803 return 0; 6803 return 0;
6804 } 6804 }
6805 target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); 6805 target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
@@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
6816 } 6816 }
6817 put_page(page); 6817 put_page(page);
6818 } 6818 }
6819 spin_unlock(&vma->vm_mm->page_table_lock); 6819 spin_unlock(ptl);
6820 return 0; 6820 return 0;
6821 } 6821 }
6822 6822