diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Kconfig | 9 | ||||
| -rw-r--r-- | mm/backing-dev.c | 4 | ||||
| -rw-r--r-- | mm/cleancache.c | 4 | ||||
| -rw-r--r-- | mm/filemap.c | 1 | ||||
| -rw-r--r-- | mm/gup.c | 4 | ||||
| -rw-r--r-- | mm/huge_memory.c | 89 | ||||
| -rw-r--r-- | mm/hugetlb.c | 7 | ||||
| -rw-r--r-- | mm/internal.h | 31 | ||||
| -rw-r--r-- | mm/memblock.c | 2 | ||||
| -rw-r--r-- | mm/memory.c | 14 | ||||
| -rw-r--r-- | mm/mempolicy.c | 14 | ||||
| -rw-r--r-- | mm/mmap.c | 85 | ||||
| -rw-r--r-- | mm/mprotect.c | 6 | ||||
| -rw-r--r-- | mm/mremap.c | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 7 | ||||
| -rw-r--r-- | mm/pgtable-generic.c | 8 | ||||
| -rw-r--r-- | mm/util.c | 27 | ||||
| -rw-r--r-- | mm/vmpressure.c | 3 | ||||
| -rw-r--r-- | mm/vmscan.c | 2 | ||||
| -rw-r--r-- | mm/vmstat.c | 70 |
20 files changed, 222 insertions, 167 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 97a4e06b15c0..03cbfa072f42 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -624,7 +624,7 @@ config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT | |||
| 624 | bool | 624 | bool |
| 625 | 625 | ||
| 626 | config DEFERRED_STRUCT_PAGE_INIT | 626 | config DEFERRED_STRUCT_PAGE_INIT |
| 627 | bool "Defer initialisation of struct pages to kswapd" | 627 | bool "Defer initialisation of struct pages to kthreads" |
| 628 | default n | 628 | default n |
| 629 | depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT | 629 | depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT |
| 630 | depends on MEMORY_HOTPLUG | 630 | depends on MEMORY_HOTPLUG |
| @@ -633,9 +633,10 @@ config DEFERRED_STRUCT_PAGE_INIT | |||
| 633 | single thread. On very large machines this can take a considerable | 633 | single thread. On very large machines this can take a considerable |
| 634 | amount of time. If this option is set, large machines will bring up | 634 | amount of time. If this option is set, large machines will bring up |
| 635 | a subset of memmap at boot and then initialise the rest in parallel | 635 | a subset of memmap at boot and then initialise the rest in parallel |
| 636 | when kswapd starts. This has a potential performance impact on | 636 | by starting one-off "pgdatinitX" kernel thread for each node X. This |
| 637 | processes running early in the lifetime of the systemm until kswapd | 637 | has a potential performance impact on processes running early in the |
| 638 | finishes the initialisation. | 638 | lifetime of the system until these kthreads finish the |
| 639 | initialisation. | ||
| 639 | 640 | ||
| 640 | config IDLE_PAGE_TRACKING | 641 | config IDLE_PAGE_TRACKING |
| 641 | bool "Enable idle page tracking" | 642 | bool "Enable idle page tracking" |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index cc5d29d2da9b..c554d173a65f 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -328,7 +328,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, | |||
| 328 | return 0; | 328 | return 0; |
| 329 | 329 | ||
| 330 | out_destroy_stat: | 330 | out_destroy_stat: |
| 331 | while (--i) | 331 | while (i--) |
| 332 | percpu_counter_destroy(&wb->stat[i]); | 332 | percpu_counter_destroy(&wb->stat[i]); |
| 333 | fprop_local_destroy_percpu(&wb->completions); | 333 | fprop_local_destroy_percpu(&wb->completions); |
| 334 | out_put_cong: | 334 | out_put_cong: |
| @@ -989,7 +989,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout) | |||
| 989 | * here rather than calling cond_resched(). | 989 | * here rather than calling cond_resched(). |
| 990 | */ | 990 | */ |
| 991 | if (current->flags & PF_WQ_WORKER) | 991 | if (current->flags & PF_WQ_WORKER) |
| 992 | schedule_timeout(1); | 992 | schedule_timeout_uninterruptible(1); |
| 993 | else | 993 | else |
| 994 | cond_resched(); | 994 | cond_resched(); |
| 995 | 995 | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c index 8fc50811119b..ba5d8f3e6d68 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | * cleancache_ops is set by cleancache_register_ops to contain the pointers | 22 | * cleancache_ops is set by cleancache_register_ops to contain the pointers |
| 23 | * to the cleancache "backend" implementation functions. | 23 | * to the cleancache "backend" implementation functions. |
| 24 | */ | 24 | */ |
| 25 | static struct cleancache_ops *cleancache_ops __read_mostly; | 25 | static const struct cleancache_ops *cleancache_ops __read_mostly; |
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| 28 | * Counters available via /sys/kernel/debug/cleancache (if debugfs is | 28 | * Counters available via /sys/kernel/debug/cleancache (if debugfs is |
| @@ -49,7 +49,7 @@ static void cleancache_register_ops_sb(struct super_block *sb, void *unused) | |||
| 49 | /* | 49 | /* |
| 50 | * Register operations for cleancache. Returns 0 on success. | 50 | * Register operations for cleancache. Returns 0 on success. |
| 51 | */ | 51 | */ |
| 52 | int cleancache_register_ops(struct cleancache_ops *ops) | 52 | int cleancache_register_ops(const struct cleancache_ops *ops) |
| 53 | { | 53 | { |
| 54 | if (cmpxchg(&cleancache_ops, NULL, ops)) | 54 | if (cmpxchg(&cleancache_ops, NULL, ops)) |
| 55 | return -EBUSY; | 55 | return -EBUSY; |
diff --git a/mm/filemap.c b/mm/filemap.c index bc943867d68c..23edccecadb0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -1890,6 +1890,7 @@ EXPORT_SYMBOL(generic_file_read_iter); | |||
| 1890 | * page_cache_read - adds requested page to the page cache if not already there | 1890 | * page_cache_read - adds requested page to the page cache if not already there |
| 1891 | * @file: file to read | 1891 | * @file: file to read |
| 1892 | * @offset: page index | 1892 | * @offset: page index |
| 1893 | * @gfp_mask: memory allocation flags | ||
| 1893 | * | 1894 | * |
| 1894 | * This adds the requested page to the page cache if it isn't already there, | 1895 | * This adds the requested page to the page cache if it isn't already there, |
| 1895 | * and schedules an I/O to read in its contents from disk. | 1896 | * and schedules an I/O to read in its contents from disk. |
| @@ -430,10 +430,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) | |||
| 430 | * Anon pages in shared mappings are surprising: now | 430 | * Anon pages in shared mappings are surprising: now |
| 431 | * just reject it. | 431 | * just reject it. |
| 432 | */ | 432 | */ |
| 433 | if (!is_cow_mapping(vm_flags)) { | 433 | if (!is_cow_mapping(vm_flags)) |
| 434 | WARN_ON_ONCE(vm_flags & VM_MAYWRITE); | ||
| 435 | return -EFAULT; | 434 | return -EFAULT; |
| 436 | } | ||
| 437 | } | 435 | } |
| 438 | } else if (!(vm_flags & VM_READ)) { | 436 | } else if (!(vm_flags & VM_READ)) { |
| 439 | if (!(gup_flags & FOLL_FORCE)) | 437 | if (!(gup_flags & FOLL_FORCE)) |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fd3a07b3e6f4..08fc0ba2207e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = { | |||
| 138 | .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), | 138 | .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), |
| 139 | }; | 139 | }; |
| 140 | 140 | ||
| 141 | static DEFINE_SPINLOCK(split_queue_lock); | ||
| 142 | static LIST_HEAD(split_queue); | ||
| 143 | static unsigned long split_queue_len; | ||
| 144 | static struct shrinker deferred_split_shrinker; | 141 | static struct shrinker deferred_split_shrinker; |
| 145 | 142 | ||
| 146 | static void set_recommended_min_free_kbytes(void) | 143 | static void set_recommended_min_free_kbytes(void) |
| @@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | |||
| 861 | return false; | 858 | return false; |
| 862 | entry = mk_pmd(zero_page, vma->vm_page_prot); | 859 | entry = mk_pmd(zero_page, vma->vm_page_prot); |
| 863 | entry = pmd_mkhuge(entry); | 860 | entry = pmd_mkhuge(entry); |
| 864 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 861 | if (pgtable) |
| 862 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | ||
| 865 | set_pmd_at(mm, haddr, pmd, entry); | 863 | set_pmd_at(mm, haddr, pmd, entry); |
| 866 | atomic_long_inc(&mm->nr_ptes); | 864 | atomic_long_inc(&mm->nr_ptes); |
| 867 | return true; | 865 | return true; |
| @@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 1039 | spinlock_t *dst_ptl, *src_ptl; | 1037 | spinlock_t *dst_ptl, *src_ptl; |
| 1040 | struct page *src_page; | 1038 | struct page *src_page; |
| 1041 | pmd_t pmd; | 1039 | pmd_t pmd; |
| 1042 | pgtable_t pgtable; | 1040 | pgtable_t pgtable = NULL; |
| 1043 | int ret; | 1041 | int ret; |
| 1044 | 1042 | ||
| 1045 | ret = -ENOMEM; | 1043 | if (!vma_is_dax(vma)) { |
| 1046 | pgtable = pte_alloc_one(dst_mm, addr); | 1044 | ret = -ENOMEM; |
| 1047 | if (unlikely(!pgtable)) | 1045 | pgtable = pte_alloc_one(dst_mm, addr); |
| 1048 | goto out; | 1046 | if (unlikely(!pgtable)) |
| 1047 | goto out; | ||
| 1048 | } | ||
| 1049 | 1049 | ||
| 1050 | dst_ptl = pmd_lock(dst_mm, dst_pmd); | 1050 | dst_ptl = pmd_lock(dst_mm, dst_pmd); |
| 1051 | src_ptl = pmd_lockptr(src_mm, src_pmd); | 1051 | src_ptl = pmd_lockptr(src_mm, src_pmd); |
| @@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 1076 | goto out_unlock; | 1076 | goto out_unlock; |
| 1077 | } | 1077 | } |
| 1078 | 1078 | ||
| 1079 | if (pmd_trans_huge(pmd)) { | 1079 | if (!vma_is_dax(vma)) { |
| 1080 | /* thp accounting separate from pmd_devmap accounting */ | 1080 | /* thp accounting separate from pmd_devmap accounting */ |
| 1081 | src_page = pmd_page(pmd); | 1081 | src_page = pmd_page(pmd); |
| 1082 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); | 1082 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); |
| @@ -3358,6 +3358,7 @@ int total_mapcount(struct page *page) | |||
| 3358 | int split_huge_page_to_list(struct page *page, struct list_head *list) | 3358 | int split_huge_page_to_list(struct page *page, struct list_head *list) |
| 3359 | { | 3359 | { |
| 3360 | struct page *head = compound_head(page); | 3360 | struct page *head = compound_head(page); |
| 3361 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); | ||
| 3361 | struct anon_vma *anon_vma; | 3362 | struct anon_vma *anon_vma; |
| 3362 | int count, mapcount, ret; | 3363 | int count, mapcount, ret; |
| 3363 | bool mlocked; | 3364 | bool mlocked; |
| @@ -3401,19 +3402,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
| 3401 | lru_add_drain(); | 3402 | lru_add_drain(); |
| 3402 | 3403 | ||
| 3403 | /* Prevent deferred_split_scan() touching ->_count */ | 3404 | /* Prevent deferred_split_scan() touching ->_count */ |
| 3404 | spin_lock_irqsave(&split_queue_lock, flags); | 3405 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
| 3405 | count = page_count(head); | 3406 | count = page_count(head); |
| 3406 | mapcount = total_mapcount(head); | 3407 | mapcount = total_mapcount(head); |
| 3407 | if (!mapcount && count == 1) { | 3408 | if (!mapcount && count == 1) { |
| 3408 | if (!list_empty(page_deferred_list(head))) { | 3409 | if (!list_empty(page_deferred_list(head))) { |
| 3409 | split_queue_len--; | 3410 | pgdata->split_queue_len--; |
| 3410 | list_del(page_deferred_list(head)); | 3411 | list_del(page_deferred_list(head)); |
| 3411 | } | 3412 | } |
| 3412 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3413 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
| 3413 | __split_huge_page(page, list); | 3414 | __split_huge_page(page, list); |
| 3414 | ret = 0; | 3415 | ret = 0; |
| 3415 | } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { | 3416 | } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { |
| 3416 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3417 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
| 3417 | pr_alert("total_mapcount: %u, page_count(): %u\n", | 3418 | pr_alert("total_mapcount: %u, page_count(): %u\n", |
| 3418 | mapcount, count); | 3419 | mapcount, count); |
| 3419 | if (PageTail(page)) | 3420 | if (PageTail(page)) |
| @@ -3421,7 +3422,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) | |||
| 3421 | dump_page(page, "total_mapcount(head) > 0"); | 3422 | dump_page(page, "total_mapcount(head) > 0"); |
| 3422 | BUG(); | 3423 | BUG(); |
| 3423 | } else { | 3424 | } else { |
| 3424 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3425 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
| 3425 | unfreeze_page(anon_vma, head); | 3426 | unfreeze_page(anon_vma, head); |
| 3426 | ret = -EBUSY; | 3427 | ret = -EBUSY; |
| 3427 | } | 3428 | } |
| @@ -3436,64 +3437,65 @@ out: | |||
| 3436 | 3437 | ||
| 3437 | void free_transhuge_page(struct page *page) | 3438 | void free_transhuge_page(struct page *page) |
| 3438 | { | 3439 | { |
| 3440 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); | ||
| 3439 | unsigned long flags; | 3441 | unsigned long flags; |
| 3440 | 3442 | ||
| 3441 | spin_lock_irqsave(&split_queue_lock, flags); | 3443 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
| 3442 | if (!list_empty(page_deferred_list(page))) { | 3444 | if (!list_empty(page_deferred_list(page))) { |
| 3443 | split_queue_len--; | 3445 | pgdata->split_queue_len--; |
| 3444 | list_del(page_deferred_list(page)); | 3446 | list_del(page_deferred_list(page)); |
| 3445 | } | 3447 | } |
| 3446 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3448 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
| 3447 | free_compound_page(page); | 3449 | free_compound_page(page); |
| 3448 | } | 3450 | } |
| 3449 | 3451 | ||
| 3450 | void deferred_split_huge_page(struct page *page) | 3452 | void deferred_split_huge_page(struct page *page) |
| 3451 | { | 3453 | { |
| 3454 | struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); | ||
| 3452 | unsigned long flags; | 3455 | unsigned long flags; |
| 3453 | 3456 | ||
| 3454 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | 3457 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
| 3455 | 3458 | ||
| 3456 | spin_lock_irqsave(&split_queue_lock, flags); | 3459 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
| 3457 | if (list_empty(page_deferred_list(page))) { | 3460 | if (list_empty(page_deferred_list(page))) { |
| 3458 | list_add_tail(page_deferred_list(page), &split_queue); | 3461 | list_add_tail(page_deferred_list(page), &pgdata->split_queue); |
| 3459 | split_queue_len++; | 3462 | pgdata->split_queue_len++; |
| 3460 | } | 3463 | } |
| 3461 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3464 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
| 3462 | } | 3465 | } |
| 3463 | 3466 | ||
| 3464 | static unsigned long deferred_split_count(struct shrinker *shrink, | 3467 | static unsigned long deferred_split_count(struct shrinker *shrink, |
| 3465 | struct shrink_control *sc) | 3468 | struct shrink_control *sc) |
| 3466 | { | 3469 | { |
| 3467 | /* | 3470 | struct pglist_data *pgdata = NODE_DATA(sc->nid); |
| 3468 | * Split a page from split_queue will free up at least one page, | 3471 | return ACCESS_ONCE(pgdata->split_queue_len); |
| 3469 | * at most HPAGE_PMD_NR - 1. We don't track exact number. | ||
| 3470 | * Let's use HPAGE_PMD_NR / 2 as ballpark. | ||
| 3471 | */ | ||
| 3472 | return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2; | ||
| 3473 | } | 3472 | } |
| 3474 | 3473 | ||
| 3475 | static unsigned long deferred_split_scan(struct shrinker *shrink, | 3474 | static unsigned long deferred_split_scan(struct shrinker *shrink, |
| 3476 | struct shrink_control *sc) | 3475 | struct shrink_control *sc) |
| 3477 | { | 3476 | { |
| 3477 | struct pglist_data *pgdata = NODE_DATA(sc->nid); | ||
| 3478 | unsigned long flags; | 3478 | unsigned long flags; |
| 3479 | LIST_HEAD(list), *pos, *next; | 3479 | LIST_HEAD(list), *pos, *next; |
| 3480 | struct page *page; | 3480 | struct page *page; |
| 3481 | int split = 0; | 3481 | int split = 0; |
| 3482 | 3482 | ||
| 3483 | spin_lock_irqsave(&split_queue_lock, flags); | 3483 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
| 3484 | list_splice_init(&split_queue, &list); | ||
| 3485 | |||
| 3486 | /* Take pin on all head pages to avoid freeing them under us */ | 3484 | /* Take pin on all head pages to avoid freeing them under us */ |
| 3487 | list_for_each_safe(pos, next, &list) { | 3485 | list_for_each_safe(pos, next, &pgdata->split_queue) { |
| 3488 | page = list_entry((void *)pos, struct page, mapping); | 3486 | page = list_entry((void *)pos, struct page, mapping); |
| 3489 | page = compound_head(page); | 3487 | page = compound_head(page); |
| 3490 | /* race with put_compound_page() */ | 3488 | if (get_page_unless_zero(page)) { |
| 3491 | if (!get_page_unless_zero(page)) { | 3489 | list_move(page_deferred_list(page), &list); |
| 3490 | } else { | ||
| 3491 | /* We lost race with put_compound_page() */ | ||
| 3492 | list_del_init(page_deferred_list(page)); | 3492 | list_del_init(page_deferred_list(page)); |
| 3493 | split_queue_len--; | 3493 | pgdata->split_queue_len--; |
| 3494 | } | 3494 | } |
| 3495 | if (!--sc->nr_to_scan) | ||
| 3496 | break; | ||
| 3495 | } | 3497 | } |
| 3496 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3498 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
| 3497 | 3499 | ||
| 3498 | list_for_each_safe(pos, next, &list) { | 3500 | list_for_each_safe(pos, next, &list) { |
| 3499 | page = list_entry((void *)pos, struct page, mapping); | 3501 | page = list_entry((void *)pos, struct page, mapping); |
| @@ -3505,17 +3507,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, | |||
| 3505 | put_page(page); | 3507 | put_page(page); |
| 3506 | } | 3508 | } |
| 3507 | 3509 | ||
| 3508 | spin_lock_irqsave(&split_queue_lock, flags); | 3510 | spin_lock_irqsave(&pgdata->split_queue_lock, flags); |
| 3509 | list_splice_tail(&list, &split_queue); | 3511 | list_splice_tail(&list, &pgdata->split_queue); |
| 3510 | spin_unlock_irqrestore(&split_queue_lock, flags); | 3512 | spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); |
| 3511 | 3513 | ||
| 3512 | return split * HPAGE_PMD_NR / 2; | 3514 | /* |
| 3515 | * Stop shrinker if we didn't split any page, but the queue is empty. | ||
| 3516 | * This can happen if pages were freed under us. | ||
| 3517 | */ | ||
| 3518 | if (!split && list_empty(&pgdata->split_queue)) | ||
| 3519 | return SHRINK_STOP; | ||
| 3520 | return split; | ||
| 3513 | } | 3521 | } |
| 3514 | 3522 | ||
| 3515 | static struct shrinker deferred_split_shrinker = { | 3523 | static struct shrinker deferred_split_shrinker = { |
| 3516 | .count_objects = deferred_split_count, | 3524 | .count_objects = deferred_split_count, |
| 3517 | .scan_objects = deferred_split_scan, | 3525 | .scan_objects = deferred_split_scan, |
| 3518 | .seeks = DEFAULT_SEEKS, | 3526 | .seeks = DEFAULT_SEEKS, |
| 3527 | .flags = SHRINKER_NUMA_AWARE, | ||
| 3519 | }; | 3528 | }; |
| 3520 | 3529 | ||
| 3521 | #ifdef CONFIG_DEBUG_FS | 3530 | #ifdef CONFIG_DEBUG_FS |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 12908dcf5831..06ae13e869d0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -1001,7 +1001,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) | |||
| 1001 | ((node = hstate_next_node_to_free(hs, mask)) || 1); \ | 1001 | ((node = hstate_next_node_to_free(hs, mask)) || 1); \ |
| 1002 | nr_nodes--) | 1002 | nr_nodes--) |
| 1003 | 1003 | ||
| 1004 | #if defined(CONFIG_CMA) && defined(CONFIG_X86_64) | 1004 | #if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)) |
| 1005 | static void destroy_compound_gigantic_page(struct page *page, | 1005 | static void destroy_compound_gigantic_page(struct page *page, |
| 1006 | unsigned int order) | 1006 | unsigned int order) |
| 1007 | { | 1007 | { |
| @@ -1214,8 +1214,8 @@ void free_huge_page(struct page *page) | |||
| 1214 | 1214 | ||
| 1215 | set_page_private(page, 0); | 1215 | set_page_private(page, 0); |
| 1216 | page->mapping = NULL; | 1216 | page->mapping = NULL; |
| 1217 | BUG_ON(page_count(page)); | 1217 | VM_BUG_ON_PAGE(page_count(page), page); |
| 1218 | BUG_ON(page_mapcount(page)); | 1218 | VM_BUG_ON_PAGE(page_mapcount(page), page); |
| 1219 | restore_reserve = PagePrivate(page); | 1219 | restore_reserve = PagePrivate(page); |
| 1220 | ClearPagePrivate(page); | 1220 | ClearPagePrivate(page); |
| 1221 | 1221 | ||
| @@ -1286,6 +1286,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned int order) | |||
| 1286 | set_page_count(p, 0); | 1286 | set_page_count(p, 0); |
| 1287 | set_compound_head(p, page); | 1287 | set_compound_head(p, page); |
| 1288 | } | 1288 | } |
| 1289 | atomic_set(compound_mapcount_ptr(page), -1); | ||
| 1289 | } | 1290 | } |
| 1290 | 1291 | ||
| 1291 | /* | 1292 | /* |
diff --git a/mm/internal.h b/mm/internal.h index ed8b5ffcf9b1..a38a21ebddb4 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
| @@ -216,6 +216,37 @@ static inline bool is_cow_mapping(vm_flags_t flags) | |||
| 216 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; | 216 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | /* | ||
| 220 | * These three helpers classifies VMAs for virtual memory accounting. | ||
| 221 | */ | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Executable code area - executable, not writable, not stack | ||
| 225 | */ | ||
| 226 | static inline bool is_exec_mapping(vm_flags_t flags) | ||
| 227 | { | ||
| 228 | return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC; | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * Stack area - atomatically grows in one direction | ||
| 233 | * | ||
| 234 | * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous: | ||
| 235 | * do_mmap() forbids all other combinations. | ||
| 236 | */ | ||
| 237 | static inline bool is_stack_mapping(vm_flags_t flags) | ||
| 238 | { | ||
| 239 | return (flags & VM_STACK) == VM_STACK; | ||
| 240 | } | ||
| 241 | |||
| 242 | /* | ||
| 243 | * Data area - private, writable, not stack | ||
| 244 | */ | ||
| 245 | static inline bool is_data_mapping(vm_flags_t flags) | ||
| 246 | { | ||
| 247 | return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; | ||
| 248 | } | ||
| 249 | |||
| 219 | /* mm/util.c */ | 250 | /* mm/util.c */ |
| 220 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | 251 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, |
| 221 | struct vm_area_struct *prev, struct rb_node *rb_parent); | 252 | struct vm_area_struct *prev, struct rb_node *rb_parent); |
diff --git a/mm/memblock.c b/mm/memblock.c index d2ed81e59a94..dd7989929f13 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
| @@ -1448,7 +1448,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) | |||
| 1448 | * Remaining API functions | 1448 | * Remaining API functions |
| 1449 | */ | 1449 | */ |
| 1450 | 1450 | ||
| 1451 | phys_addr_t __init memblock_phys_mem_size(void) | 1451 | phys_addr_t __init_memblock memblock_phys_mem_size(void) |
| 1452 | { | 1452 | { |
| 1453 | return memblock.memory.total_size; | 1453 | return memblock.memory.total_size; |
| 1454 | } | 1454 | } |
diff --git a/mm/memory.c b/mm/memory.c index 5aa4f55eb786..38090ca37a08 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1612,10 +1612,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, | |||
| 1612 | * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP | 1612 | * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP |
| 1613 | * without pte special, it would there be refcounted as a normal page. | 1613 | * without pte special, it would there be refcounted as a normal page. |
| 1614 | */ | 1614 | */ |
| 1615 | if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) { | 1615 | if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { |
| 1616 | struct page *page; | 1616 | struct page *page; |
| 1617 | 1617 | ||
| 1618 | page = pfn_t_to_page(pfn); | 1618 | /* |
| 1619 | * At this point we are committed to insert_page() | ||
| 1620 | * regardless of whether the caller specified flags that | ||
| 1621 | * result in pfn_t_has_page() == false. | ||
| 1622 | */ | ||
| 1623 | page = pfn_to_page(pfn_t_to_pfn(pfn)); | ||
| 1619 | return insert_page(vma, addr, page, vma->vm_page_prot); | 1624 | return insert_page(vma, addr, page, vma->vm_page_prot); |
| 1620 | } | 1625 | } |
| 1621 | return insert_pfn(vma, addr, pfn, vma->vm_page_prot); | 1626 | return insert_pfn(vma, addr, pfn, vma->vm_page_prot); |
| @@ -2253,11 +2258,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2253 | 2258 | ||
| 2254 | page_cache_get(old_page); | 2259 | page_cache_get(old_page); |
| 2255 | 2260 | ||
| 2256 | /* | ||
| 2257 | * Only catch write-faults on shared writable pages, | ||
| 2258 | * read-only shared pages can get COWed by | ||
| 2259 | * get_user_pages(.write=1, .force=1). | ||
| 2260 | */ | ||
| 2261 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | 2261 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { |
| 2262 | int tmp; | 2262 | int tmp; |
| 2263 | 2263 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 27d135408a22..4c4187c0e1de 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -548,8 +548,7 @@ retry: | |||
| 548 | goto retry; | 548 | goto retry; |
| 549 | } | 549 | } |
| 550 | 550 | ||
| 551 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | 551 | migrate_page_add(page, qp->pagelist, flags); |
| 552 | migrate_page_add(page, qp->pagelist, flags); | ||
| 553 | } | 552 | } |
| 554 | pte_unmap_unlock(pte - 1, ptl); | 553 | pte_unmap_unlock(pte - 1, ptl); |
| 555 | cond_resched(); | 554 | cond_resched(); |
| @@ -625,7 +624,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, | |||
| 625 | unsigned long endvma = vma->vm_end; | 624 | unsigned long endvma = vma->vm_end; |
| 626 | unsigned long flags = qp->flags; | 625 | unsigned long flags = qp->flags; |
| 627 | 626 | ||
| 628 | if (vma->vm_flags & VM_PFNMAP) | 627 | if (!vma_migratable(vma)) |
| 629 | return 1; | 628 | return 1; |
| 630 | 629 | ||
| 631 | if (endvma > end) | 630 | if (endvma > end) |
| @@ -644,16 +643,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, | |||
| 644 | 643 | ||
| 645 | if (flags & MPOL_MF_LAZY) { | 644 | if (flags & MPOL_MF_LAZY) { |
| 646 | /* Similar to task_numa_work, skip inaccessible VMAs */ | 645 | /* Similar to task_numa_work, skip inaccessible VMAs */ |
| 647 | if (vma_migratable(vma) && | 646 | if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) |
| 648 | vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) | ||
| 649 | change_prot_numa(vma, start, endvma); | 647 | change_prot_numa(vma, start, endvma); |
| 650 | return 1; | 648 | return 1; |
| 651 | } | 649 | } |
| 652 | 650 | ||
| 653 | if ((flags & MPOL_MF_STRICT) || | 651 | /* queue pages from current vma */ |
| 654 | ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && | 652 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) |
| 655 | vma_migratable(vma))) | ||
| 656 | /* queue pages from current vma */ | ||
| 657 | return 0; | 653 | return 0; |
| 658 | return 1; | 654 | return 1; |
| 659 | } | 655 | } |
| @@ -42,6 +42,7 @@ | |||
| 42 | #include <linux/memory.h> | 42 | #include <linux/memory.h> |
| 43 | #include <linux/printk.h> | 43 | #include <linux/printk.h> |
| 44 | #include <linux/userfaultfd_k.h> | 44 | #include <linux/userfaultfd_k.h> |
| 45 | #include <linux/moduleparam.h> | ||
| 45 | 46 | ||
| 46 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
| 47 | #include <asm/cacheflush.h> | 48 | #include <asm/cacheflush.h> |
| @@ -69,6 +70,8 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX; | |||
| 69 | int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; | 70 | int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; |
| 70 | #endif | 71 | #endif |
| 71 | 72 | ||
| 73 | static bool ignore_rlimit_data = true; | ||
| 74 | core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644); | ||
| 72 | 75 | ||
| 73 | static void unmap_region(struct mm_struct *mm, | 76 | static void unmap_region(struct mm_struct *mm, |
| 74 | struct vm_area_struct *vma, struct vm_area_struct *prev, | 77 | struct vm_area_struct *vma, struct vm_area_struct *prev, |
| @@ -387,8 +390,9 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma) | |||
| 387 | } | 390 | } |
| 388 | 391 | ||
| 389 | #ifdef CONFIG_DEBUG_VM_RB | 392 | #ifdef CONFIG_DEBUG_VM_RB |
| 390 | static int browse_rb(struct rb_root *root) | 393 | static int browse_rb(struct mm_struct *mm) |
| 391 | { | 394 | { |
| 395 | struct rb_root *root = &mm->mm_rb; | ||
| 392 | int i = 0, j, bug = 0; | 396 | int i = 0, j, bug = 0; |
| 393 | struct rb_node *nd, *pn = NULL; | 397 | struct rb_node *nd, *pn = NULL; |
| 394 | unsigned long prev = 0, pend = 0; | 398 | unsigned long prev = 0, pend = 0; |
| @@ -411,12 +415,14 @@ static int browse_rb(struct rb_root *root) | |||
| 411 | vma->vm_start, vma->vm_end); | 415 | vma->vm_start, vma->vm_end); |
| 412 | bug = 1; | 416 | bug = 1; |
| 413 | } | 417 | } |
| 418 | spin_lock(&mm->page_table_lock); | ||
| 414 | if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { | 419 | if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { |
| 415 | pr_emerg("free gap %lx, correct %lx\n", | 420 | pr_emerg("free gap %lx, correct %lx\n", |
| 416 | vma->rb_subtree_gap, | 421 | vma->rb_subtree_gap, |
| 417 | vma_compute_subtree_gap(vma)); | 422 | vma_compute_subtree_gap(vma)); |
| 418 | bug = 1; | 423 | bug = 1; |
| 419 | } | 424 | } |
| 425 | spin_unlock(&mm->page_table_lock); | ||
| 420 | i++; | 426 | i++; |
| 421 | pn = nd; | 427 | pn = nd; |
| 422 | prev = vma->vm_start; | 428 | prev = vma->vm_start; |
| @@ -453,12 +459,16 @@ static void validate_mm(struct mm_struct *mm) | |||
| 453 | struct vm_area_struct *vma = mm->mmap; | 459 | struct vm_area_struct *vma = mm->mmap; |
| 454 | 460 | ||
| 455 | while (vma) { | 461 | while (vma) { |
| 462 | struct anon_vma *anon_vma = vma->anon_vma; | ||
| 456 | struct anon_vma_chain *avc; | 463 | struct anon_vma_chain *avc; |
| 457 | 464 | ||
| 458 | vma_lock_anon_vma(vma); | 465 | if (anon_vma) { |
| 459 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) | 466 | anon_vma_lock_read(anon_vma); |
| 460 | anon_vma_interval_tree_verify(avc); | 467 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) |
| 461 | vma_unlock_anon_vma(vma); | 468 | anon_vma_interval_tree_verify(avc); |
| 469 | anon_vma_unlock_read(anon_vma); | ||
| 470 | } | ||
| 471 | |||
| 462 | highest_address = vma->vm_end; | 472 | highest_address = vma->vm_end; |
| 463 | vma = vma->vm_next; | 473 | vma = vma->vm_next; |
| 464 | i++; | 474 | i++; |
| @@ -472,7 +482,7 @@ static void validate_mm(struct mm_struct *mm) | |||
| 472 | mm->highest_vm_end, highest_address); | 482 | mm->highest_vm_end, highest_address); |
| 473 | bug = 1; | 483 | bug = 1; |
| 474 | } | 484 | } |
| 475 | i = browse_rb(&mm->mm_rb); | 485 | i = browse_rb(mm); |
| 476 | if (i != mm->map_count) { | 486 | if (i != mm->map_count) { |
| 477 | if (i != -1) | 487 | if (i != -1) |
| 478 | pr_emerg("map_count %d rb %d\n", mm->map_count, i); | 488 | pr_emerg("map_count %d rb %d\n", mm->map_count, i); |
| @@ -2139,32 +2149,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns | |||
| 2139 | int expand_upwards(struct vm_area_struct *vma, unsigned long address) | 2149 | int expand_upwards(struct vm_area_struct *vma, unsigned long address) |
| 2140 | { | 2150 | { |
| 2141 | struct mm_struct *mm = vma->vm_mm; | 2151 | struct mm_struct *mm = vma->vm_mm; |
| 2142 | int error; | 2152 | int error = 0; |
| 2143 | 2153 | ||
| 2144 | if (!(vma->vm_flags & VM_GROWSUP)) | 2154 | if (!(vma->vm_flags & VM_GROWSUP)) |
| 2145 | return -EFAULT; | 2155 | return -EFAULT; |
| 2146 | 2156 | ||
| 2147 | /* | 2157 | /* Guard against wrapping around to address 0. */ |
| 2148 | * We must make sure the anon_vma is allocated | 2158 | if (address < PAGE_ALIGN(address+4)) |
| 2149 | * so that the anon_vma locking is not a noop. | 2159 | address = PAGE_ALIGN(address+4); |
| 2150 | */ | 2160 | else |
| 2161 | return -ENOMEM; | ||
| 2162 | |||
| 2163 | /* We must make sure the anon_vma is allocated. */ | ||
| 2151 | if (unlikely(anon_vma_prepare(vma))) | 2164 | if (unlikely(anon_vma_prepare(vma))) |
| 2152 | return -ENOMEM; | 2165 | return -ENOMEM; |
| 2153 | vma_lock_anon_vma(vma); | ||
| 2154 | 2166 | ||
| 2155 | /* | 2167 | /* |
| 2156 | * vma->vm_start/vm_end cannot change under us because the caller | 2168 | * vma->vm_start/vm_end cannot change under us because the caller |
| 2157 | * is required to hold the mmap_sem in read mode. We need the | 2169 | * is required to hold the mmap_sem in read mode. We need the |
| 2158 | * anon_vma lock to serialize against concurrent expand_stacks. | 2170 | * anon_vma lock to serialize against concurrent expand_stacks. |
| 2159 | * Also guard against wrapping around to address 0. | ||
| 2160 | */ | 2171 | */ |
| 2161 | if (address < PAGE_ALIGN(address+4)) | 2172 | anon_vma_lock_write(vma->anon_vma); |
| 2162 | address = PAGE_ALIGN(address+4); | ||
| 2163 | else { | ||
| 2164 | vma_unlock_anon_vma(vma); | ||
| 2165 | return -ENOMEM; | ||
| 2166 | } | ||
| 2167 | error = 0; | ||
| 2168 | 2173 | ||
| 2169 | /* Somebody else might have raced and expanded it already */ | 2174 | /* Somebody else might have raced and expanded it already */ |
| 2170 | if (address > vma->vm_end) { | 2175 | if (address > vma->vm_end) { |
| @@ -2182,7 +2187,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
| 2182 | * updates, but we only hold a shared mmap_sem | 2187 | * updates, but we only hold a shared mmap_sem |
| 2183 | * lock here, so we need to protect against | 2188 | * lock here, so we need to protect against |
| 2184 | * concurrent vma expansions. | 2189 | * concurrent vma expansions. |
| 2185 | * vma_lock_anon_vma() doesn't help here, as | 2190 | * anon_vma_lock_write() doesn't help here, as |
| 2186 | * we don't guarantee that all growable vmas | 2191 | * we don't guarantee that all growable vmas |
| 2187 | * in a mm share the same root anon vma. | 2192 | * in a mm share the same root anon vma. |
| 2188 | * So, we reuse mm->page_table_lock to guard | 2193 | * So, we reuse mm->page_table_lock to guard |
| @@ -2205,7 +2210,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
| 2205 | } | 2210 | } |
| 2206 | } | 2211 | } |
| 2207 | } | 2212 | } |
| 2208 | vma_unlock_anon_vma(vma); | 2213 | anon_vma_unlock_write(vma->anon_vma); |
| 2209 | khugepaged_enter_vma_merge(vma, vma->vm_flags); | 2214 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
| 2210 | validate_mm(mm); | 2215 | validate_mm(mm); |
| 2211 | return error; | 2216 | return error; |
| @@ -2221,25 +2226,21 @@ int expand_downwards(struct vm_area_struct *vma, | |||
| 2221 | struct mm_struct *mm = vma->vm_mm; | 2226 | struct mm_struct *mm = vma->vm_mm; |
| 2222 | int error; | 2227 | int error; |
| 2223 | 2228 | ||
| 2224 | /* | ||
| 2225 | * We must make sure the anon_vma is allocated | ||
| 2226 | * so that the anon_vma locking is not a noop. | ||
| 2227 | */ | ||
| 2228 | if (unlikely(anon_vma_prepare(vma))) | ||
| 2229 | return -ENOMEM; | ||
| 2230 | |||
| 2231 | address &= PAGE_MASK; | 2229 | address &= PAGE_MASK; |
| 2232 | error = security_mmap_addr(address); | 2230 | error = security_mmap_addr(address); |
| 2233 | if (error) | 2231 | if (error) |
| 2234 | return error; | 2232 | return error; |
| 2235 | 2233 | ||
| 2236 | vma_lock_anon_vma(vma); | 2234 | /* We must make sure the anon_vma is allocated. */ |
| 2235 | if (unlikely(anon_vma_prepare(vma))) | ||
| 2236 | return -ENOMEM; | ||
| 2237 | 2237 | ||
| 2238 | /* | 2238 | /* |
| 2239 | * vma->vm_start/vm_end cannot change under us because the caller | 2239 | * vma->vm_start/vm_end cannot change under us because the caller |
| 2240 | * is required to hold the mmap_sem in read mode. We need the | 2240 | * is required to hold the mmap_sem in read mode. We need the |
| 2241 | * anon_vma lock to serialize against concurrent expand_stacks. | 2241 | * anon_vma lock to serialize against concurrent expand_stacks. |
| 2242 | */ | 2242 | */ |
| 2243 | anon_vma_lock_write(vma->anon_vma); | ||
| 2243 | 2244 | ||
| 2244 | /* Somebody else might have raced and expanded it already */ | 2245 | /* Somebody else might have raced and expanded it already */ |
| 2245 | if (address < vma->vm_start) { | 2246 | if (address < vma->vm_start) { |
| @@ -2257,7 +2258,7 @@ int expand_downwards(struct vm_area_struct *vma, | |||
| 2257 | * updates, but we only hold a shared mmap_sem | 2258 | * updates, but we only hold a shared mmap_sem |
| 2258 | * lock here, so we need to protect against | 2259 | * lock here, so we need to protect against |
| 2259 | * concurrent vma expansions. | 2260 | * concurrent vma expansions. |
| 2260 | * vma_lock_anon_vma() doesn't help here, as | 2261 | * anon_vma_lock_write() doesn't help here, as |
| 2261 | * we don't guarantee that all growable vmas | 2262 | * we don't guarantee that all growable vmas |
| 2262 | * in a mm share the same root anon vma. | 2263 | * in a mm share the same root anon vma. |
| 2263 | * So, we reuse mm->page_table_lock to guard | 2264 | * So, we reuse mm->page_table_lock to guard |
| @@ -2278,7 +2279,7 @@ int expand_downwards(struct vm_area_struct *vma, | |||
| 2278 | } | 2279 | } |
| 2279 | } | 2280 | } |
| 2280 | } | 2281 | } |
| 2281 | vma_unlock_anon_vma(vma); | 2282 | anon_vma_unlock_write(vma->anon_vma); |
| 2282 | khugepaged_enter_vma_merge(vma, vma->vm_flags); | 2283 | khugepaged_enter_vma_merge(vma, vma->vm_flags); |
| 2283 | validate_mm(mm); | 2284 | validate_mm(mm); |
| 2284 | return error; | 2285 | return error; |
| @@ -2982,9 +2983,17 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) | |||
| 2982 | if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) | 2983 | if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) |
| 2983 | return false; | 2984 | return false; |
| 2984 | 2985 | ||
| 2985 | if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS & | 2986 | if (is_data_mapping(flags) && |
| 2986 | (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE) | 2987 | mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) { |
| 2987 | return mm->data_vm + npages <= rlimit(RLIMIT_DATA); | 2988 | if (ignore_rlimit_data) |
| 2989 | pr_warn_once("%s (%d): VmData %lu exceed data ulimit " | ||
| 2990 | "%lu. Will be forbidden soon.\n", | ||
| 2991 | current->comm, current->pid, | ||
| 2992 | (mm->data_vm + npages) << PAGE_SHIFT, | ||
| 2993 | rlimit(RLIMIT_DATA)); | ||
| 2994 | else | ||
| 2995 | return false; | ||
| 2996 | } | ||
| 2988 | 2997 | ||
| 2989 | return true; | 2998 | return true; |
| 2990 | } | 2999 | } |
| @@ -2993,11 +3002,11 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) | |||
| 2993 | { | 3002 | { |
| 2994 | mm->total_vm += npages; | 3003 | mm->total_vm += npages; |
| 2995 | 3004 | ||
| 2996 | if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC) | 3005 | if (is_exec_mapping(flags)) |
| 2997 | mm->exec_vm += npages; | 3006 | mm->exec_vm += npages; |
| 2998 | else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN))) | 3007 | else if (is_stack_mapping(flags)) |
| 2999 | mm->stack_vm += npages; | 3008 | mm->stack_vm += npages; |
| 3000 | else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | 3009 | else if (is_data_mapping(flags)) |
| 3001 | mm->data_vm += npages; | 3010 | mm->data_vm += npages; |
| 3002 | } | 3011 | } |
| 3003 | 3012 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 8eb7bb40dc40..f7cb3d4d9c2e 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
| @@ -160,9 +160,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { | 162 | if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { |
| 163 | if (next - addr != HPAGE_PMD_SIZE) | 163 | if (next - addr != HPAGE_PMD_SIZE) { |
| 164 | split_huge_pmd(vma, pmd, addr); | 164 | split_huge_pmd(vma, pmd, addr); |
| 165 | else { | 165 | if (pmd_none(*pmd)) |
| 166 | continue; | ||
| 167 | } else { | ||
| 166 | int nr_ptes = change_huge_pmd(vma, pmd, addr, | 168 | int nr_ptes = change_huge_pmd(vma, pmd, addr, |
| 167 | newprot, prot_numa); | 169 | newprot, prot_numa); |
| 168 | 170 | ||
diff --git a/mm/mremap.c b/mm/mremap.c index d77946a997f7..8eeba02fc991 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
| @@ -210,6 +210,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, | |||
| 210 | } | 210 | } |
| 211 | } | 211 | } |
| 212 | split_huge_pmd(vma, old_pmd, old_addr); | 212 | split_huge_pmd(vma, old_pmd, old_addr); |
| 213 | if (pmd_none(*old_pmd)) | ||
| 214 | continue; | ||
| 213 | VM_BUG_ON(pmd_trans_huge(*old_pmd)); | 215 | VM_BUG_ON(pmd_trans_huge(*old_pmd)); |
| 214 | } | 216 | } |
| 215 | if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma, | 217 | if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 63358d9f9aa9..838ca8bb64f7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -5210,6 +5210,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
| 5210 | pgdat->numabalancing_migrate_nr_pages = 0; | 5210 | pgdat->numabalancing_migrate_nr_pages = 0; |
| 5211 | pgdat->numabalancing_migrate_next_window = jiffies; | 5211 | pgdat->numabalancing_migrate_next_window = jiffies; |
| 5212 | #endif | 5212 | #endif |
| 5213 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 5214 | spin_lock_init(&pgdat->split_queue_lock); | ||
| 5215 | INIT_LIST_HEAD(&pgdat->split_queue); | ||
| 5216 | pgdat->split_queue_len = 0; | ||
| 5217 | #endif | ||
| 5213 | init_waitqueue_head(&pgdat->kswapd_wait); | 5218 | init_waitqueue_head(&pgdat->kswapd_wait); |
| 5214 | init_waitqueue_head(&pgdat->pfmemalloc_wait); | 5219 | init_waitqueue_head(&pgdat->pfmemalloc_wait); |
| 5215 | pgdat_page_ext_init(pgdat); | 5220 | pgdat_page_ext_init(pgdat); |
| @@ -6615,7 +6620,7 @@ bool is_pageblock_removable_nolock(struct page *page) | |||
| 6615 | return !has_unmovable_pages(zone, page, 0, true); | 6620 | return !has_unmovable_pages(zone, page, 0, true); |
| 6616 | } | 6621 | } |
| 6617 | 6622 | ||
| 6618 | #ifdef CONFIG_CMA | 6623 | #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) |
| 6619 | 6624 | ||
| 6620 | static unsigned long pfn_max_align_down(unsigned long pfn) | 6625 | static unsigned long pfn_max_align_down(unsigned long pfn) |
| 6621 | { | 6626 | { |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 9d4767698a1c..06a005b979a7 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
| @@ -90,9 +90,9 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, | |||
| 90 | * ARCHes with special requirements for evicting THP backing TLB entries can | 90 | * ARCHes with special requirements for evicting THP backing TLB entries can |
| 91 | * implement this. Otherwise also, it can help optimize normal TLB flush in | 91 | * implement this. Otherwise also, it can help optimize normal TLB flush in |
| 92 | * THP regime. stock flush_tlb_range() typically has optimization to nuke the | 92 | * THP regime. stock flush_tlb_range() typically has optimization to nuke the |
| 93 | * entire TLB TLB if flush span is greater than a threshhold, which will | 93 | * entire TLB if flush span is greater than a threshold, which will |
| 94 | * likely be true for a single huge page. Thus a single thp flush will | 94 | * likely be true for a single huge page. Thus a single thp flush will |
| 95 | * invalidate the entire TLB which is not desitable. | 95 | * invalidate the entire TLB which is not desirable. |
| 96 | * e.g. see arch/arc: flush_pmd_tlb_range | 96 | * e.g. see arch/arc: flush_pmd_tlb_range |
| 97 | */ | 97 | */ |
| 98 | #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) | 98 | #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) |
| @@ -195,7 +195,9 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, | |||
| 195 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 195 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
| 196 | VM_BUG_ON(pmd_trans_huge(*pmdp)); | 196 | VM_BUG_ON(pmd_trans_huge(*pmdp)); |
| 197 | pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); | 197 | pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); |
| 198 | flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | 198 | |
| 199 | /* collapse entails shooting down ptes not pmd */ | ||
| 200 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
| 199 | return pmd; | 201 | return pmd; |
| 200 | } | 202 | } |
| 201 | #endif | 203 | #endif |
| @@ -230,36 +230,11 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | /* Check if the vma is being used as a stack by this task */ | 232 | /* Check if the vma is being used as a stack by this task */ |
| 233 | static int vm_is_stack_for_task(struct task_struct *t, | 233 | int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t) |
| 234 | struct vm_area_struct *vma) | ||
| 235 | { | 234 | { |
| 236 | return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); | 235 | return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); |
| 237 | } | 236 | } |
| 238 | 237 | ||
| 239 | /* | ||
| 240 | * Check if the vma is being used as a stack. | ||
| 241 | * If is_group is non-zero, check in the entire thread group or else | ||
| 242 | * just check in the current task. Returns the task_struct of the task | ||
| 243 | * that the vma is stack for. Must be called under rcu_read_lock(). | ||
| 244 | */ | ||
| 245 | struct task_struct *task_of_stack(struct task_struct *task, | ||
| 246 | struct vm_area_struct *vma, bool in_group) | ||
| 247 | { | ||
| 248 | if (vm_is_stack_for_task(task, vma)) | ||
| 249 | return task; | ||
| 250 | |||
| 251 | if (in_group) { | ||
| 252 | struct task_struct *t; | ||
| 253 | |||
| 254 | for_each_thread(task, t) { | ||
| 255 | if (vm_is_stack_for_task(t, vma)) | ||
| 256 | return t; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | return NULL; | ||
| 261 | } | ||
| 262 | |||
| 263 | #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) | 238 | #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) |
| 264 | void arch_pick_mmap_layout(struct mm_struct *mm) | 239 | void arch_pick_mmap_layout(struct mm_struct *mm) |
| 265 | { | 240 | { |
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 9a6c0704211c..149fdf6c5c56 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
| @@ -248,9 +248,8 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, | |||
| 248 | 248 | ||
| 249 | if (tree) { | 249 | if (tree) { |
| 250 | spin_lock(&vmpr->sr_lock); | 250 | spin_lock(&vmpr->sr_lock); |
| 251 | vmpr->tree_scanned += scanned; | 251 | scanned = vmpr->tree_scanned += scanned; |
| 252 | vmpr->tree_reclaimed += reclaimed; | 252 | vmpr->tree_reclaimed += reclaimed; |
| 253 | scanned = vmpr->scanned; | ||
| 254 | spin_unlock(&vmpr->sr_lock); | 253 | spin_unlock(&vmpr->sr_lock); |
| 255 | 254 | ||
| 256 | if (scanned < vmpressure_win) | 255 | if (scanned < vmpressure_win) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index eb3dd37ccd7c..71b1c29948db 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -1443,7 +1443,7 @@ int isolate_lru_page(struct page *page) | |||
| 1443 | int ret = -EBUSY; | 1443 | int ret = -EBUSY; |
| 1444 | 1444 | ||
| 1445 | VM_BUG_ON_PAGE(!page_count(page), page); | 1445 | VM_BUG_ON_PAGE(!page_count(page), page); |
| 1446 | VM_BUG_ON_PAGE(PageTail(page), page); | 1446 | WARN_RATELIMIT(PageTail(page), "trying to isolate tail page"); |
| 1447 | 1447 | ||
| 1448 | if (PageLRU(page)) { | 1448 | if (PageLRU(page)) { |
| 1449 | struct zone *zone = page_zone(page); | 1449 | struct zone *zone = page_zone(page); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 40b2c74ddf16..084c6725b373 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
| @@ -1396,10 +1396,15 @@ static void vmstat_update(struct work_struct *w) | |||
| 1396 | * Counters were updated so we expect more updates | 1396 | * Counters were updated so we expect more updates |
| 1397 | * to occur in the future. Keep on running the | 1397 | * to occur in the future. Keep on running the |
| 1398 | * update worker thread. | 1398 | * update worker thread. |
| 1399 | * If we were marked on cpu_stat_off clear the flag | ||
| 1400 | * so that vmstat_shepherd doesn't schedule us again. | ||
| 1399 | */ | 1401 | */ |
| 1400 | queue_delayed_work_on(smp_processor_id(), vmstat_wq, | 1402 | if (!cpumask_test_and_clear_cpu(smp_processor_id(), |
| 1401 | this_cpu_ptr(&vmstat_work), | 1403 | cpu_stat_off)) { |
| 1402 | round_jiffies_relative(sysctl_stat_interval)); | 1404 | queue_delayed_work_on(smp_processor_id(), vmstat_wq, |
| 1405 | this_cpu_ptr(&vmstat_work), | ||
| 1406 | round_jiffies_relative(sysctl_stat_interval)); | ||
| 1407 | } | ||
| 1403 | } else { | 1408 | } else { |
| 1404 | /* | 1409 | /* |
| 1405 | * We did not update any counters so the app may be in | 1410 | * We did not update any counters so the app may be in |
| @@ -1417,18 +1422,6 @@ static void vmstat_update(struct work_struct *w) | |||
| 1417 | * until the diffs stay at zero. The function is used by NOHZ and can only be | 1422 | * until the diffs stay at zero. The function is used by NOHZ and can only be |
| 1418 | * invoked when tick processing is not active. | 1423 | * invoked when tick processing is not active. |
| 1419 | */ | 1424 | */ |
| 1420 | void quiet_vmstat(void) | ||
| 1421 | { | ||
| 1422 | if (system_state != SYSTEM_RUNNING) | ||
| 1423 | return; | ||
| 1424 | |||
| 1425 | do { | ||
| 1426 | if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off)) | ||
| 1427 | cancel_delayed_work(this_cpu_ptr(&vmstat_work)); | ||
| 1428 | |||
| 1429 | } while (refresh_cpu_vm_stats(false)); | ||
| 1430 | } | ||
| 1431 | |||
| 1432 | /* | 1425 | /* |
| 1433 | * Check if the diffs for a certain cpu indicate that | 1426 | * Check if the diffs for a certain cpu indicate that |
| 1434 | * an update is needed. | 1427 | * an update is needed. |
| @@ -1452,6 +1445,30 @@ static bool need_update(int cpu) | |||
| 1452 | return false; | 1445 | return false; |
| 1453 | } | 1446 | } |
| 1454 | 1447 | ||
| 1448 | void quiet_vmstat(void) | ||
| 1449 | { | ||
| 1450 | if (system_state != SYSTEM_RUNNING) | ||
| 1451 | return; | ||
| 1452 | |||
| 1453 | /* | ||
| 1454 | * If we are already in hands of the shepherd then there | ||
| 1455 | * is nothing for us to do here. | ||
| 1456 | */ | ||
| 1457 | if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off)) | ||
| 1458 | return; | ||
| 1459 | |||
| 1460 | if (!need_update(smp_processor_id())) | ||
| 1461 | return; | ||
| 1462 | |||
| 1463 | /* | ||
| 1464 | * Just refresh counters and do not care about the pending delayed | ||
| 1465 | * vmstat_update. It doesn't fire that often to matter and canceling | ||
| 1466 | * it would be too expensive from this path. | ||
| 1467 | * vmstat_shepherd will take care about that for us. | ||
| 1468 | */ | ||
| 1469 | refresh_cpu_vm_stats(false); | ||
| 1470 | } | ||
| 1471 | |||
| 1455 | 1472 | ||
| 1456 | /* | 1473 | /* |
| 1457 | * Shepherd worker thread that checks the | 1474 | * Shepherd worker thread that checks the |
| @@ -1469,18 +1486,25 @@ static void vmstat_shepherd(struct work_struct *w) | |||
| 1469 | 1486 | ||
| 1470 | get_online_cpus(); | 1487 | get_online_cpus(); |
| 1471 | /* Check processors whose vmstat worker threads have been disabled */ | 1488 | /* Check processors whose vmstat worker threads have been disabled */ |
| 1472 | for_each_cpu(cpu, cpu_stat_off) | 1489 | for_each_cpu(cpu, cpu_stat_off) { |
| 1473 | if (need_update(cpu) && | 1490 | struct delayed_work *dw = &per_cpu(vmstat_work, cpu); |
| 1474 | cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) | ||
| 1475 | |||
| 1476 | queue_delayed_work_on(cpu, vmstat_wq, | ||
| 1477 | &per_cpu(vmstat_work, cpu), 0); | ||
| 1478 | 1491 | ||
| 1492 | if (need_update(cpu)) { | ||
| 1493 | if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) | ||
| 1494 | queue_delayed_work_on(cpu, vmstat_wq, dw, 0); | ||
| 1495 | } else { | ||
| 1496 | /* | ||
| 1497 | * Cancel the work if quiet_vmstat has put this | ||
| 1498 | * cpu on cpu_stat_off because the work item might | ||
| 1499 | * be still scheduled | ||
| 1500 | */ | ||
| 1501 | cancel_delayed_work(dw); | ||
| 1502 | } | ||
| 1503 | } | ||
| 1479 | put_online_cpus(); | 1504 | put_online_cpus(); |
| 1480 | 1505 | ||
| 1481 | schedule_delayed_work(&shepherd, | 1506 | schedule_delayed_work(&shepherd, |
| 1482 | round_jiffies_relative(sysctl_stat_interval)); | 1507 | round_jiffies_relative(sysctl_stat_interval)); |
| 1483 | |||
| 1484 | } | 1508 | } |
| 1485 | 1509 | ||
| 1486 | static void __init start_shepherd_timer(void) | 1510 | static void __init start_shepherd_timer(void) |
| @@ -1488,7 +1512,7 @@ static void __init start_shepherd_timer(void) | |||
| 1488 | int cpu; | 1512 | int cpu; |
| 1489 | 1513 | ||
| 1490 | for_each_possible_cpu(cpu) | 1514 | for_each_possible_cpu(cpu) |
| 1491 | INIT_DELAYED_WORK(per_cpu_ptr(&vmstat_work, cpu), | 1515 | INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), |
| 1492 | vmstat_update); | 1516 | vmstat_update); |
| 1493 | 1517 | ||
| 1494 | if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL)) | 1518 | if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL)) |
