diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 89 |
1 files changed, 49 insertions, 40 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fd3a07b3e6f4..08fc0ba2207e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = { | |||
138 | .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), | 138 | .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static DEFINE_SPINLOCK(split_queue_lock); | ||
142 | static LIST_HEAD(split_queue); | ||
143 | static unsigned long split_queue_len; | ||
144 | static struct shrinker deferred_split_shrinker; | 141 | static struct shrinker deferred_split_shrinker; |
145 | 142 | ||
146 | static void set_recommended_min_free_kbytes(void) | 143 | static void set_recommended_min_free_kbytes(void) |
@@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | |||
861 | return false; | 858 | return false; |
862 | entry = mk_pmd(zero_page, vma->vm_page_prot); | 859 | entry = mk_pmd(zero_page, vma->vm_page_prot); |
863 | entry = pmd_mkhuge(entry); | 860 | entry = pmd_mkhuge(entry); |
864 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 861 | if (pgtable) |
862 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | ||
865 | set_pmd_at(mm, haddr, pmd, entry); | 863 | set_pmd_at(mm, haddr, pmd, entry); |
866 | atomic_long_inc(&mm->nr_ptes); | 864 | atomic_long_inc(&mm->nr_ptes); |
867 | return true; | 865 | return true; |
@@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1039 | spinlock_t *dst_ptl, *src_ptl; | 1037 | spinlock_t *dst_ptl, *src_ptl; |
1040 | struct page *src_page; | 1038 | struct page *src_page; |
1041 | pmd_t pmd; | 1039 | pmd_t pmd; |
1042 | pgtable_t pgtable; | 1040 | pgtable_t pgtable = NULL; |
1043 | int ret; | 1041 | int ret; |
1044 | 1042 | ||
1045 | ret = -ENOMEM; | 1043 | if (!vma_is_dax(vma)) { |
1046 | pgtable = pte_alloc_one(dst_mm, addr); | 1044 | ret = -ENOMEM; |
1047 | if (unlikely(!pgtable)) | 1045 | pgtable = pte_alloc_one(dst_mm, addr); |
1048 | goto out; | 1046 | if (unlikely(!pgtable)) |
1047 | goto out; | ||
1048 | } | ||
1049 | 1049 | ||
1050 | dst_ptl = pmd_lock(dst_mm, dst_pmd); | 1050 | dst_ptl = pmd_lock(dst_mm, dst_pmd); |
1051 | src_ptl = pmd_lockptr(src_mm, src_pmd); | 1051 | src_ptl = pmd_lockptr(src_mm, src_pmd); |
@@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1076 | goto out_unlock; | 1076 | goto out_unlock; |
1077 | } | 1077 | } |
1078 | 1078 | ||
1079 | if (pmd_trans_huge(pmd)) { | 1079 | if (!vma_is_dax(vma)) { |
1080 | /* thp accounting separate from pmd_devmap accounting */ | 1080 | /* thp accounting separate from pmd_devmap accounting */ |
1081 | src_page = pmd_page(pmd); | 1081 | src_page = pmd_page(pmd); |
1082 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); | 1082 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); |
@@ -3358,6 +3358,7 @@ int total_mapcount(struct page *page) | |||
3358 | int split_huge_page_to_list(struct page *page, struct list_head *list) | 3358 | int split_huge_page_to_list(struct page *page, struct list_head *list) |
3359 | { | 3359 | { |
3360 | struct page *head = compound_head(page); | 3360 | struct page *head = compound_head(page); |
3361 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); | ||
3361 | struct anon_vma *anon_vma; | 3362 | struct anon_vma *anon_vma; |
3362 | int count, mapcount, ret; | 3363 | int count, mapcount, ret; |
3363 | bool mlocked; | 3364 | bool mlocked; |
@@ -3401,19 +3402,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
3401 | lru_add_drain(); | 3402 | lru_add_drain(); |
3402 | 3403 | ||
3403 | /* Prevent deferred_split_scan() touching ->_count */ | 3404 | /* Prevent deferred_split_scan() touching ->_count */ |
3404 | spin_lock_irqsave(&split_queue_lock, flags); | 3405 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3405 | count = page_count(head); | 3406 | count = page_count(head); |
3406 | mapcount = total_mapcount(head); | 3407 | mapcount = total_mapcount(head); |
3407 | if (!mapcount && count == 1) { | 3408 | if (!mapcount && count == 1) { |
3408 | if (!list_empty(page_deferred_list(head))) { | 3409 | if (!list_empty(page_deferred_list(head))) { |
3409 | split_queue_len--; | 3410 | pgdata->split_queue_len--; |
3410 | list_del(page_deferred_list(head)); | 3411 | list_del(page_deferred_list(head)); |
3411 | } | 3412 | } |
3412 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3413 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3413 | __split_huge_page(page, list); | 3414 | __split_huge_page(page, list); |
3414 | ret = 0; | 3415 | ret = 0; |
3415 | } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { | 3416 | } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { |
3416 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3417 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3417 | pr_alert("total_mapcount: %u, page_count(): %u\n", | 3418 | pr_alert("total_mapcount: %u, page_count(): %u\n", |
3418 | mapcount, count); | 3419 | mapcount, count); |
3419 | if (PageTail(page)) | 3420 | if (PageTail(page)) |
@@ -3421,7 +3422,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
3421 | dump_page(page, "total_mapcount(head) > 0"); | 3422 | dump_page(page, "total_mapcount(head) > 0"); |
3422 | BUG(); | 3423 | BUG(); |
3423 | } else { | 3424 | } else { |
3424 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3425 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3425 | unfreeze_page(anon_vma, head); | 3426 | unfreeze_page(anon_vma, head); |
3426 | ret = -EBUSY; | 3427 | ret = -EBUSY; |
3427 | } | 3428 | } |
@@ -3436,64 +3437,65 @@ out: | |||
3436 | 3437 | ||
3437 | void free_transhuge_page(struct page *page) | 3438 | void free_transhuge_page(struct page *page) |
3438 | { | 3439 | { |
3440 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); | ||
3439 | unsigned long flags; | 3441 | unsigned long flags; |
3440 | 3442 | ||
3441 | spin_lock_irqsave(&split_queue_lock, flags); | 3443 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3442 | if (!list_empty(page_deferred_list(page))) { | 3444 | if (!list_empty(page_deferred_list(page))) { |
3443 | split_queue_len--; | 3445 | pgdata->split_queue_len--; |
3444 | list_del(page_deferred_list(page)); | 3446 | list_del(page_deferred_list(page)); |
3445 | } | 3447 | } |
3446 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3448 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3447 | free_compound_page(page); | 3449 | free_compound_page(page); |
3448 | } | 3450 | } |
3449 | 3451 | ||
3450 | void deferred_split_huge_page(struct page *page) | 3452 | void deferred_split_huge_page(struct page *page) |
3451 | { | 3453 | { |
3454 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); | ||
3452 | unsigned long flags; | 3455 | unsigned long flags; |
3453 | 3456 | ||
3454 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | 3457 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
3455 | 3458 | ||
3456 | spin_lock_irqsave(&split_queue_lock, flags); | 3459 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3457 | if (list_empty(page_deferred_list(page))) { | 3460 | if (list_empty(page_deferred_list(page))) { |
3458 | list_add_tail(page_deferred_list(page), &split_queue); | 3461 | list_add_tail(page_deferred_list(page), &pgdata->split_queue); |
3459 | split_queue_len++; | 3462 | pgdata->split_queue_len++; |
3460 | } | 3463 | } |
3461 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3464 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3462 | } | 3465 | } |
3463 | 3466 | ||
3464 | static unsigned long deferred_split_count(struct shrinker *shrink, | 3467 | static unsigned long deferred_split_count(struct shrinker *shrink, |
3465 | struct shrink_control *sc) | 3468 | struct shrink_control *sc) |
3466 | { | 3469 | { |
3467 | /* | 3470 | struct pglist_data *pgdata = NODE_DATA(sc->nid); |
3468 | * Split a page from split_queue will free up at least one page, | 3471 | return ACCESS_ONCE(pgdata->split_queue_len); |
3469 | * at most HPAGE_PMD_NR - 1. We don't track exact number. | ||
3470 | * Let's use HPAGE_PMD_NR / 2 as ballpark. | ||
3471 | */ | ||
3472 | return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2; | ||
3473 | } | 3472 | } |
3474 | 3473 | ||
3475 | static unsigned long deferred_split_scan(struct shrinker *shrink, | 3474 | static unsigned long deferred_split_scan(struct shrinker *shrink, |
3476 | struct shrink_control *sc) | 3475 | struct shrink_control *sc) |
3477 | { | 3476 | { |
3477 | struct pglist_data *pgdata = NODE_DATA(sc->nid); | ||
3478 | unsigned long flags; | 3478 | unsigned long flags; |
3479 | LIST_HEAD(list), *pos, *next; | 3479 | LIST_HEAD(list), *pos, *next; |
3480 | struct page *page; | 3480 | struct page *page; |
3481 | int split = 0; | 3481 | int split = 0; |
3482 | 3482 | ||
3483 | spin_lock_irqsave(&split_queue_lock, flags); | 3483 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3484 | list_splice_init(&split_queue, &list); | ||
3485 | |||
3486 | /* Take pin on all head pages to avoid freeing them under us */ | 3484 | /* Take pin on all head pages to avoid freeing them under us */ |
3487 | list_for_each_safe(pos, next, &list) { | 3485 | list_for_each_safe(pos, next, &pgdata->split_queue) { |
3488 | page = list_entry((void *)pos, struct page, mapping); | 3486 | page = list_entry((void *)pos, struct page, mapping); |
3489 | page = compound_head(page); | 3487 | page = compound_head(page); |
3490 | /* race with put_compound_page() */ | 3488 | if (get_page_unless_zero(page)) { |
3491 | if (!get_page_unless_zero(page)) { | 3489 | list_move(page_deferred_list(page), &list); |
3490 | } else { | ||
3491 | /* We lost race with put_compound_page() */ | ||
3492 | list_del_init(page_deferred_list(page)); | 3492 | list_del_init(page_deferred_list(page)); |
3493 | split_queue_len--; | 3493 | pgdata->split_queue_len--; |
3494 | } | 3494 | } |
3495 | if (!--sc->nr_to_scan) | ||
3496 | break; | ||
3495 | } | 3497 | } |
3496 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3498 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3497 | 3499 | ||
3498 | list_for_each_safe(pos, next, &list) { | 3500 | list_for_each_safe(pos, next, &list) { |
3499 | page = list_entry((void *)pos, struct page, mapping); | 3501 | page = list_entry((void *)pos, struct page, mapping); |
@@ -3505,17 +3507,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, | |||
3505 | put_page(page); | 3507 | put_page(page); |
3506 | } | 3508 | } |
3507 | 3509 | ||
3508 | spin_lock_irqsave(&split_queue_lock, flags); | 3510 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
3509 | list_splice_tail(&list, &split_queue); | 3511 | list_splice_tail(&list, &pgdata->split_queue); |
3510 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3512 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
3511 | 3513 | ||
3512 | return split * HPAGE_PMD_NR / 2; | 3514 | /* |
3515 | * Stop shrinker if we didn't split any page, but the queue is empty. | ||
3516 | * This can happen if pages were freed under us. | ||
3517 | */ | ||
3518 | if (!split && list_empty(&pgdata->split_queue)) | ||
3519 | return SHRINK_STOP; | ||
3520 | return split; | ||
3513 | } | 3521 | } |
3514 | 3522 | ||
3515 | static struct shrinker deferred_split_shrinker = { | 3523 | static struct shrinker deferred_split_shrinker = { |
3516 | .count_objects = deferred_split_count, | 3524 | .count_objects = deferred_split_count, |
3517 | .scan_objects = deferred_split_scan, | 3525 | .scan_objects = deferred_split_scan, |
3518 | .seeks = DEFAULT_SEEKS, | 3526 | .seeks = DEFAULT_SEEKS, |
3527 | .flags = SHRINKER_NUMA_AWARE, | ||
3519 | }; | 3528 | }; |
3520 | 3529 | ||
3521 | #ifdef CONFIG_DEBUG_FS | 3530 | #ifdef CONFIG_DEBUG_FS |