aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c89
1 files changed, 49 insertions, 40 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fd3a07b3e6f4..08fc0ba2207e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = {
138 .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), 138 .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
139}; 139};
140 140
141static DEFINE_SPINLOCK(split_queue_lock);
142static LIST_HEAD(split_queue);
143static unsigned long split_queue_len;
144static struct shrinker deferred_split_shrinker; 141static struct shrinker deferred_split_shrinker;
145 142
146static void set_recommended_min_free_kbytes(void) 143static void set_recommended_min_free_kbytes(void)
@@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
861 return false; 858 return false;
862 entry = mk_pmd(zero_page, vma->vm_page_prot); 859 entry = mk_pmd(zero_page, vma->vm_page_prot);
863 entry = pmd_mkhuge(entry); 860 entry = pmd_mkhuge(entry);
864 pgtable_trans_huge_deposit(mm, pmd, pgtable); 861 if (pgtable)
862 pgtable_trans_huge_deposit(mm, pmd, pgtable);
865 set_pmd_at(mm, haddr, pmd, entry); 863 set_pmd_at(mm, haddr, pmd, entry);
866 atomic_long_inc(&mm->nr_ptes); 864 atomic_long_inc(&mm->nr_ptes);
867 return true; 865 return true;
@@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1039 spinlock_t *dst_ptl, *src_ptl; 1037 spinlock_t *dst_ptl, *src_ptl;
1040 struct page *src_page; 1038 struct page *src_page;
1041 pmd_t pmd; 1039 pmd_t pmd;
1042 pgtable_t pgtable; 1040 pgtable_t pgtable = NULL;
1043 int ret; 1041 int ret;
1044 1042
1045 ret = -ENOMEM; 1043 if (!vma_is_dax(vma)) {
1046 pgtable = pte_alloc_one(dst_mm, addr); 1044 ret = -ENOMEM;
1047 if (unlikely(!pgtable)) 1045 pgtable = pte_alloc_one(dst_mm, addr);
1048 goto out; 1046 if (unlikely(!pgtable))
1047 goto out;
1048 }
1049 1049
1050 dst_ptl = pmd_lock(dst_mm, dst_pmd); 1050 dst_ptl = pmd_lock(dst_mm, dst_pmd);
1051 src_ptl = pmd_lockptr(src_mm, src_pmd); 1051 src_ptl = pmd_lockptr(src_mm, src_pmd);
@@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1076 goto out_unlock; 1076 goto out_unlock;
1077 } 1077 }
1078 1078
1079 if (pmd_trans_huge(pmd)) { 1079 if (!vma_is_dax(vma)) {
1080 /* thp accounting separate from pmd_devmap accounting */ 1080 /* thp accounting separate from pmd_devmap accounting */
1081 src_page = pmd_page(pmd); 1081 src_page = pmd_page(pmd);
1082 VM_BUG_ON_PAGE(!PageHead(src_page), src_page); 1082 VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
@@ -3358,6 +3358,7 @@ int total_mapcount(struct page *page)
3358int split_huge_page_to_list(struct page *page, struct list_head *list) 3358int split_huge_page_to_list(struct page *page, struct list_head *list)
3359{ 3359{
3360 struct page *head = compound_head(page); 3360 struct page *head = compound_head(page);
3361 struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
3361 struct anon_vma *anon_vma; 3362 struct anon_vma *anon_vma;
3362 int count, mapcount, ret; 3363 int count, mapcount, ret;
3363 bool mlocked; 3364 bool mlocked;
@@ -3401,19 +3402,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
3401 lru_add_drain(); 3402 lru_add_drain();
3402 3403
3403 /* Prevent deferred_split_scan() touching ->_count */ 3404 /* Prevent deferred_split_scan() touching ->_count */
3404 spin_lock_irqsave(&split_queue_lock, flags); 3405 spin_lock_irqsave(&pgdata->split_queue_lock, flags);
3405 count = page_count(head); 3406 count = page_count(head);
3406 mapcount = total_mapcount(head); 3407 mapcount = total_mapcount(head);
3407 if (!mapcount && count == 1) { 3408 if (!mapcount && count == 1) {
3408 if (!list_empty(page_deferred_list(head))) { 3409 if (!list_empty(page_deferred_list(head))) {
3409 split_queue_len--; 3410 pgdata->split_queue_len--;
3410 list_del(page_deferred_list(head)); 3411 list_del(page_deferred_list(head));
3411 } 3412 }
3412 spin_unlock_irqrestore(&split_queue_lock, flags); 3413 spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
3413 __split_huge_page(page, list); 3414 __split_huge_page(page, list);
3414 ret = 0; 3415 ret = 0;
3415 } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { 3416 } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
3416 spin_unlock_irqrestore(&split_queue_lock, flags); 3417 spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
3417 pr_alert("total_mapcount: %u, page_count(): %u\n", 3418 pr_alert("total_mapcount: %u, page_count(): %u\n",
3418 mapcount, count); 3419 mapcount, count);
3419 if (PageTail(page)) 3420 if (PageTail(page))
@@ -3421,7 +3422,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
3421 dump_page(page, "total_mapcount(head) > 0"); 3422 dump_page(page, "total_mapcount(head) > 0");
3422 BUG(); 3423 BUG();
3423 } else { 3424 } else {
3424 spin_unlock_irqrestore(&split_queue_lock, flags); 3425 spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
3425 unfreeze_page(anon_vma, head); 3426 unfreeze_page(anon_vma, head);
3426 ret = -EBUSY; 3427 ret = -EBUSY;
3427 } 3428 }
@@ -3436,64 +3437,65 @@ out:
3436 3437
3437void free_transhuge_page(struct page *page) 3438void free_transhuge_page(struct page *page)
3438{ 3439{
3440 struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
3439 unsigned long flags; 3441 unsigned long flags;
3440 3442
3441 spin_lock_irqsave(&split_queue_lock, flags); 3443 spin_lock_irqsave(&pgdata->split_queue_lock, flags);
3442 if (!list_empty(page_deferred_list(page))) { 3444 if (!list_empty(page_deferred_list(page))) {
3443 split_queue_len--; 3445 pgdata->split_queue_len--;
3444 list_del(page_deferred_list(page)); 3446 list_del(page_deferred_list(page));
3445 } 3447 }
3446 spin_unlock_irqrestore(&split_queue_lock, flags); 3448 spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
3447 free_compound_page(page); 3449 free_compound_page(page);
3448} 3450}
3449 3451
3450void deferred_split_huge_page(struct page *page) 3452void deferred_split_huge_page(struct page *page)
3451{ 3453{
3454 struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
3452 unsigned long flags; 3455 unsigned long flags;
3453 3456
3454 VM_BUG_ON_PAGE(!PageTransHuge(page), page); 3457 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3455 3458
3456 spin_lock_irqsave(&split_queue_lock, flags); 3459 spin_lock_irqsave(&pgdata->split_queue_lock, flags);
3457 if (list_empty(page_deferred_list(page))) { 3460 if (list_empty(page_deferred_list(page))) {
3458 list_add_tail(page_deferred_list(page), &split_queue); 3461 list_add_tail(page_deferred_list(page), &pgdata->split_queue);
3459 split_queue_len++; 3462 pgdata->split_queue_len++;
3460 } 3463 }
3461 spin_unlock_irqrestore(&split_queue_lock, flags); 3464 spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
3462} 3465}
3463 3466
3464static unsigned long deferred_split_count(struct shrinker *shrink, 3467static unsigned long deferred_split_count(struct shrinker *shrink,
3465 struct shrink_control *sc) 3468 struct shrink_control *sc)
3466{ 3469{
3467 /* 3470 struct pglist_data *pgdata = NODE_DATA(sc->nid);
3468 * Split a page from split_queue will free up at least one page, 3471 return ACCESS_ONCE(pgdata->split_queue_len);
3469 * at most HPAGE_PMD_NR - 1. We don't track exact number.
3470 * Let's use HPAGE_PMD_NR / 2 as ballpark.
3471 */
3472 return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
3473} 3472}
3474 3473
3475static unsigned long deferred_split_scan(struct shrinker *shrink, 3474static unsigned long deferred_split_scan(struct shrinker *shrink,
3476 struct shrink_control *sc) 3475 struct shrink_control *sc)
3477{ 3476{
3477 struct pglist_data *pgdata = NODE_DATA(sc->nid);
3478 unsigned long flags; 3478 unsigned long flags;
3479 LIST_HEAD(list), *pos, *next; 3479 LIST_HEAD(list), *pos, *next;
3480 struct page *page; 3480 struct page *page;
3481 int split = 0; 3481 int split = 0;
3482 3482
3483 spin_lock_irqsave(&split_queue_lock, flags); 3483 spin_lock_irqsave(&pgdata->split_queue_lock, flags);
3484 list_splice_init(&split_queue, &list);
3485
3486 /* Take pin on all head pages to avoid freeing them under us */ 3484 /* Take pin on all head pages to avoid freeing them under us */
3487 list_for_each_safe(pos, next, &list) { 3485 list_for_each_safe(pos, next, &pgdata->split_queue) {
3488 page = list_entry((void *)pos, struct page, mapping); 3486 page = list_entry((void *)pos, struct page, mapping);
3489 page = compound_head(page); 3487 page = compound_head(page);
3490 /* race with put_compound_page() */ 3488 if (get_page_unless_zero(page)) {
3491 if (!get_page_unless_zero(page)) { 3489 list_move(page_deferred_list(page), &list);
3490 } else {
3491 /* We lost race with put_compound_page() */
3492 list_del_init(page_deferred_list(page)); 3492 list_del_init(page_deferred_list(page));
3493 split_queue_len--; 3493 pgdata->split_queue_len--;
3494 } 3494 }
3495 if (!--sc->nr_to_scan)
3496 break;
3495 } 3497 }
3496 spin_unlock_irqrestore(&split_queue_lock, flags); 3498 spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
3497 3499
3498 list_for_each_safe(pos, next, &list) { 3500 list_for_each_safe(pos, next, &list) {
3499 page = list_entry((void *)pos, struct page, mapping); 3501 page = list_entry((void *)pos, struct page, mapping);
@@ -3505,17 +3507,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
3505 put_page(page); 3507 put_page(page);
3506 } 3508 }
3507 3509
3508 spin_lock_irqsave(&split_queue_lock, flags); 3510 spin_lock_irqsave(&pgdata->split_queue_lock, flags);
3509 list_splice_tail(&list, &split_queue); 3511 list_splice_tail(&list, &pgdata->split_queue);
3510 spin_unlock_irqrestore(&split_queue_lock, flags); 3512 spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
3511 3513
3512 return split * HPAGE_PMD_NR / 2; 3514 /*
3515 * Stop shrinker if we didn't split any page, but the queue is empty.
3516 * This can happen if pages were freed under us.
3517 */
3518 if (!split && list_empty(&pgdata->split_queue))
3519 return SHRINK_STOP;
3520 return split;
3513} 3521}
3514 3522
3515static struct shrinker deferred_split_shrinker = { 3523static struct shrinker deferred_split_shrinker = {
3516 .count_objects = deferred_split_count, 3524 .count_objects = deferred_split_count,
3517 .scan_objects = deferred_split_scan, 3525 .scan_objects = deferred_split_scan,
3518 .seeks = DEFAULT_SEEKS, 3526 .seeks = DEFAULT_SEEKS,
3527 .flags = SHRINKER_NUMA_AWARE,
3519}; 3528};
3520 3529
3521#ifdef CONFIG_DEBUG_FS 3530#ifdef CONFIG_DEBUG_FS