diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/filemap.c | 33 | ||||
| -rw-r--r-- | mm/hugetlb.c | 3 | ||||
| -rw-r--r-- | mm/hugetlb_cgroup.c | 5 | ||||
| -rw-r--r-- | mm/ksm.c | 8 | ||||
| -rw-r--r-- | mm/memcontrol.c | 41 | ||||
| -rw-r--r-- | mm/memory-failure.c | 18 | ||||
| -rw-r--r-- | mm/memory.c | 24 | ||||
| -rw-r--r-- | mm/migrate.c | 5 | ||||
| -rw-r--r-- | mm/page-writeback.c | 6 | ||||
| -rw-r--r-- | mm/page_alloc.c | 31 | ||||
| -rw-r--r-- | mm/percpu.c | 3 | ||||
| -rw-r--r-- | mm/rmap.c | 10 | ||||
| -rw-r--r-- | mm/shmem.c | 102 | ||||
| -rw-r--r-- | mm/slab_common.c | 2 | ||||
| -rw-r--r-- | mm/truncate.c | 11 |
15 files changed, 182 insertions, 120 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index dafb06f70a09..65d44fd88c78 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -241,18 +241,6 @@ void delete_from_page_cache(struct page *page) | |||
| 241 | } | 241 | } |
| 242 | EXPORT_SYMBOL(delete_from_page_cache); | 242 | EXPORT_SYMBOL(delete_from_page_cache); |
| 243 | 243 | ||
| 244 | static int sleep_on_page(void *word) | ||
| 245 | { | ||
| 246 | io_schedule(); | ||
| 247 | return 0; | ||
| 248 | } | ||
| 249 | |||
| 250 | static int sleep_on_page_killable(void *word) | ||
| 251 | { | ||
| 252 | sleep_on_page(word); | ||
| 253 | return fatal_signal_pending(current) ? -EINTR : 0; | ||
| 254 | } | ||
| 255 | |||
| 256 | static int filemap_check_errors(struct address_space *mapping) | 244 | static int filemap_check_errors(struct address_space *mapping) |
| 257 | { | 245 | { |
| 258 | int ret = 0; | 246 | int ret = 0; |
| @@ -692,7 +680,7 @@ void wait_on_page_bit(struct page *page, int bit_nr) | |||
| 692 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); | 680 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); |
| 693 | 681 | ||
| 694 | if (test_bit(bit_nr, &page->flags)) | 682 | if (test_bit(bit_nr, &page->flags)) |
| 695 | __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page, | 683 | __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io, |
| 696 | TASK_UNINTERRUPTIBLE); | 684 | TASK_UNINTERRUPTIBLE); |
| 697 | } | 685 | } |
| 698 | EXPORT_SYMBOL(wait_on_page_bit); | 686 | EXPORT_SYMBOL(wait_on_page_bit); |
| @@ -705,7 +693,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) | |||
| 705 | return 0; | 693 | return 0; |
| 706 | 694 | ||
| 707 | return __wait_on_bit(page_waitqueue(page), &wait, | 695 | return __wait_on_bit(page_waitqueue(page), &wait, |
| 708 | sleep_on_page_killable, TASK_KILLABLE); | 696 | bit_wait_io, TASK_KILLABLE); |
| 709 | } | 697 | } |
| 710 | 698 | ||
| 711 | /** | 699 | /** |
| @@ -806,7 +794,7 @@ void __lock_page(struct page *page) | |||
| 806 | { | 794 | { |
| 807 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 795 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
| 808 | 796 | ||
| 809 | __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, | 797 | __wait_on_bit_lock(page_waitqueue(page), &wait, bit_wait_io, |
| 810 | TASK_UNINTERRUPTIBLE); | 798 | TASK_UNINTERRUPTIBLE); |
| 811 | } | 799 | } |
| 812 | EXPORT_SYMBOL(__lock_page); | 800 | EXPORT_SYMBOL(__lock_page); |
| @@ -816,7 +804,7 @@ int __lock_page_killable(struct page *page) | |||
| 816 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 804 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
| 817 | 805 | ||
| 818 | return __wait_on_bit_lock(page_waitqueue(page), &wait, | 806 | return __wait_on_bit_lock(page_waitqueue(page), &wait, |
| 819 | sleep_on_page_killable, TASK_KILLABLE); | 807 | bit_wait_io, TASK_KILLABLE); |
| 820 | } | 808 | } |
| 821 | EXPORT_SYMBOL_GPL(__lock_page_killable); | 809 | EXPORT_SYMBOL_GPL(__lock_page_killable); |
| 822 | 810 | ||
| @@ -1031,18 +1019,21 @@ EXPORT_SYMBOL(find_lock_entry); | |||
| 1031 | * @mapping: the address_space to search | 1019 | * @mapping: the address_space to search |
| 1032 | * @offset: the page index | 1020 | * @offset: the page index |
| 1033 | * @fgp_flags: PCG flags | 1021 | * @fgp_flags: PCG flags |
| 1034 | * @gfp_mask: gfp mask to use if a page is to be allocated | 1022 | * @cache_gfp_mask: gfp mask to use for the page cache data page allocation |
| 1023 | * @radix_gfp_mask: gfp mask to use for radix tree node allocation | ||
| 1035 | * | 1024 | * |
| 1036 | * Looks up the page cache slot at @mapping & @offset. | 1025 | * Looks up the page cache slot at @mapping & @offset. |
| 1037 | * | 1026 | * |
| 1038 | * PCG flags modify how the page is returned | 1027 | * PCG flags modify how the page is returned. |
| 1039 | * | 1028 | * |
| 1040 | * FGP_ACCESSED: the page will be marked accessed | 1029 | * FGP_ACCESSED: the page will be marked accessed |
| 1041 | * FGP_LOCK: Page is return locked | 1030 | * FGP_LOCK: Page is return locked |
| 1042 | * FGP_CREAT: If page is not present then a new page is allocated using | 1031 | * FGP_CREAT: If page is not present then a new page is allocated using |
| 1043 | * @gfp_mask and added to the page cache and the VM's LRU | 1032 | * @cache_gfp_mask and added to the page cache and the VM's LRU |
| 1044 | * list. The page is returned locked and with an increased | 1033 | * list. If radix tree nodes are allocated during page cache |
| 1045 | * refcount. Otherwise, %NULL is returned. | 1034 | * insertion then @radix_gfp_mask is used. The page is returned |
| 1035 | * locked and with an increased refcount. Otherwise, %NULL is | ||
| 1036 | * returned. | ||
| 1046 | * | 1037 | * |
| 1047 | * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even | 1038 | * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even |
| 1048 | * if the GFP flags specified for FGP_CREAT are atomic. | 1039 | * if the GFP flags specified for FGP_CREAT are atomic. |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2024bbd573d2..7a0a73d2fcff 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -856,7 +856,7 @@ struct hstate *size_to_hstate(unsigned long size) | |||
| 856 | return NULL; | 856 | return NULL; |
| 857 | } | 857 | } |
| 858 | 858 | ||
| 859 | static void free_huge_page(struct page *page) | 859 | void free_huge_page(struct page *page) |
| 860 | { | 860 | { |
| 861 | /* | 861 | /* |
| 862 | * Can't pass hstate in here because it is called from the | 862 | * Can't pass hstate in here because it is called from the |
| @@ -2604,6 +2604,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
| 2604 | } else { | 2604 | } else { |
| 2605 | if (cow) | 2605 | if (cow) |
| 2606 | huge_ptep_set_wrprotect(src, addr, src_pte); | 2606 | huge_ptep_set_wrprotect(src, addr, src_pte); |
| 2607 | entry = huge_ptep_get(src_pte); | ||
| 2607 | ptepage = pte_page(entry); | 2608 | ptepage = pte_page(entry); |
| 2608 | get_page(ptepage); | 2609 | get_page(ptepage); |
| 2609 | page_dup_rmap(ptepage); | 2610 | page_dup_rmap(ptepage); |
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 493f758445e7..9aae6f47433f 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c | |||
| @@ -358,9 +358,8 @@ static void __init __hugetlb_cgroup_file_init(int idx) | |||
| 358 | cft = &h->cgroup_files[4]; | 358 | cft = &h->cgroup_files[4]; |
| 359 | memset(cft, 0, sizeof(*cft)); | 359 | memset(cft, 0, sizeof(*cft)); |
| 360 | 360 | ||
| 361 | WARN_ON(cgroup_add_cftypes(&hugetlb_cgrp_subsys, h->cgroup_files)); | 361 | WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys, |
| 362 | 362 | h->cgroup_files)); | |
| 363 | return; | ||
| 364 | } | 363 | } |
| 365 | 364 | ||
| 366 | void __init hugetlb_cgroup_file_init(void) | 365 | void __init hugetlb_cgroup_file_init(void) |
| @@ -1978,18 +1978,12 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage) | |||
| 1978 | #endif /* CONFIG_MIGRATION */ | 1978 | #endif /* CONFIG_MIGRATION */ |
| 1979 | 1979 | ||
| 1980 | #ifdef CONFIG_MEMORY_HOTREMOVE | 1980 | #ifdef CONFIG_MEMORY_HOTREMOVE |
| 1981 | static int just_wait(void *word) | ||
| 1982 | { | ||
| 1983 | schedule(); | ||
| 1984 | return 0; | ||
| 1985 | } | ||
| 1986 | |||
| 1987 | static void wait_while_offlining(void) | 1981 | static void wait_while_offlining(void) |
| 1988 | { | 1982 | { |
| 1989 | while (ksm_run & KSM_RUN_OFFLINE) { | 1983 | while (ksm_run & KSM_RUN_OFFLINE) { |
| 1990 | mutex_unlock(&ksm_thread_mutex); | 1984 | mutex_unlock(&ksm_thread_mutex); |
| 1991 | wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), | 1985 | wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), |
| 1992 | just_wait, TASK_UNINTERRUPTIBLE); | 1986 | TASK_UNINTERRUPTIBLE); |
| 1993 | mutex_lock(&ksm_thread_mutex); | 1987 | mutex_lock(&ksm_thread_mutex); |
| 1994 | } | 1988 | } |
| 1995 | } | 1989 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a2c7bcb0e6eb..f009a14918d2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -5415,8 +5415,12 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) | |||
| 5415 | { | 5415 | { |
| 5416 | struct mem_cgroup_eventfd_list *ev; | 5416 | struct mem_cgroup_eventfd_list *ev; |
| 5417 | 5417 | ||
| 5418 | spin_lock(&memcg_oom_lock); | ||
| 5419 | |||
| 5418 | list_for_each_entry(ev, &memcg->oom_notify, list) | 5420 | list_for_each_entry(ev, &memcg->oom_notify, list) |
| 5419 | eventfd_signal(ev->eventfd, 1); | 5421 | eventfd_signal(ev->eventfd, 1); |
| 5422 | |||
| 5423 | spin_unlock(&memcg_oom_lock); | ||
| 5420 | return 0; | 5424 | return 0; |
| 5421 | } | 5425 | } |
| 5422 | 5426 | ||
| @@ -6003,7 +6007,6 @@ static struct cftype mem_cgroup_files[] = { | |||
| 6003 | }, | 6007 | }, |
| 6004 | { | 6008 | { |
| 6005 | .name = "use_hierarchy", | 6009 | .name = "use_hierarchy", |
| 6006 | .flags = CFTYPE_INSANE, | ||
| 6007 | .write_u64 = mem_cgroup_hierarchy_write, | 6010 | .write_u64 = mem_cgroup_hierarchy_write, |
| 6008 | .read_u64 = mem_cgroup_hierarchy_read, | 6011 | .read_u64 = mem_cgroup_hierarchy_read, |
| 6009 | }, | 6012 | }, |
| @@ -6407,6 +6410,29 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | |||
| 6407 | __mem_cgroup_free(memcg); | 6410 | __mem_cgroup_free(memcg); |
| 6408 | } | 6411 | } |
| 6409 | 6412 | ||
| 6413 | /** | ||
| 6414 | * mem_cgroup_css_reset - reset the states of a mem_cgroup | ||
| 6415 | * @css: the target css | ||
| 6416 | * | ||
| 6417 | * Reset the states of the mem_cgroup associated with @css. This is | ||
| 6418 | * invoked when the userland requests disabling on the default hierarchy | ||
| 6419 | * but the memcg is pinned through dependency. The memcg should stop | ||
| 6420 | * applying policies and should revert to the vanilla state as it may be | ||
| 6421 | * made visible again. | ||
| 6422 | * | ||
| 6423 | * The current implementation only resets the essential configurations. | ||
| 6424 | * This needs to be expanded to cover all the visible parts. | ||
| 6425 | */ | ||
| 6426 | static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) | ||
| 6427 | { | ||
| 6428 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
| 6429 | |||
| 6430 | mem_cgroup_resize_limit(memcg, ULLONG_MAX); | ||
| 6431 | mem_cgroup_resize_memsw_limit(memcg, ULLONG_MAX); | ||
| 6432 | memcg_update_kmem_limit(memcg, ULLONG_MAX); | ||
| 6433 | res_counter_set_soft_limit(&memcg->res, ULLONG_MAX); | ||
| 6434 | } | ||
| 6435 | |||
| 6410 | #ifdef CONFIG_MMU | 6436 | #ifdef CONFIG_MMU |
| 6411 | /* Handlers for move charge at task migration. */ | 6437 | /* Handlers for move charge at task migration. */ |
| 6412 | #define PRECHARGE_COUNT_AT_ONCE 256 | 6438 | #define PRECHARGE_COUNT_AT_ONCE 256 |
| @@ -7001,16 +7027,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css, | |||
| 7001 | 7027 | ||
| 7002 | /* | 7028 | /* |
| 7003 | * Cgroup retains root cgroups across [un]mount cycles making it necessary | 7029 | * Cgroup retains root cgroups across [un]mount cycles making it necessary |
| 7004 | * to verify sane_behavior flag on each mount attempt. | 7030 | * to verify whether we're attached to the default hierarchy on each mount |
| 7031 | * attempt. | ||
| 7005 | */ | 7032 | */ |
| 7006 | static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) | 7033 | static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) |
| 7007 | { | 7034 | { |
| 7008 | /* | 7035 | /* |
| 7009 | * use_hierarchy is forced with sane_behavior. cgroup core | 7036 | * use_hierarchy is forced on the default hierarchy. cgroup core |
| 7010 | * guarantees that @root doesn't have any children, so turning it | 7037 | * guarantees that @root doesn't have any children, so turning it |
| 7011 | * on for the root memcg is enough. | 7038 | * on for the root memcg is enough. |
| 7012 | */ | 7039 | */ |
| 7013 | if (cgroup_sane_behavior(root_css->cgroup)) | 7040 | if (cgroup_on_dfl(root_css->cgroup)) |
| 7014 | mem_cgroup_from_css(root_css)->use_hierarchy = true; | 7041 | mem_cgroup_from_css(root_css)->use_hierarchy = true; |
| 7015 | } | 7042 | } |
| 7016 | 7043 | ||
| @@ -7019,11 +7046,12 @@ struct cgroup_subsys memory_cgrp_subsys = { | |||
| 7019 | .css_online = mem_cgroup_css_online, | 7046 | .css_online = mem_cgroup_css_online, |
| 7020 | .css_offline = mem_cgroup_css_offline, | 7047 | .css_offline = mem_cgroup_css_offline, |
| 7021 | .css_free = mem_cgroup_css_free, | 7048 | .css_free = mem_cgroup_css_free, |
| 7049 | .css_reset = mem_cgroup_css_reset, | ||
| 7022 | .can_attach = mem_cgroup_can_attach, | 7050 | .can_attach = mem_cgroup_can_attach, |
| 7023 | .cancel_attach = mem_cgroup_cancel_attach, | 7051 | .cancel_attach = mem_cgroup_cancel_attach, |
| 7024 | .attach = mem_cgroup_move_task, | 7052 | .attach = mem_cgroup_move_task, |
| 7025 | .bind = mem_cgroup_bind, | 7053 | .bind = mem_cgroup_bind, |
| 7026 | .base_cftypes = mem_cgroup_files, | 7054 | .legacy_cftypes = mem_cgroup_files, |
| 7027 | .early_init = 0, | 7055 | .early_init = 0, |
| 7028 | }; | 7056 | }; |
| 7029 | 7057 | ||
| @@ -7040,7 +7068,8 @@ __setup("swapaccount=", enable_swap_account); | |||
| 7040 | 7068 | ||
| 7041 | static void __init memsw_file_init(void) | 7069 | static void __init memsw_file_init(void) |
| 7042 | { | 7070 | { |
| 7043 | WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, memsw_cgroup_files)); | 7071 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, |
| 7072 | memsw_cgroup_files)); | ||
| 7044 | } | 7073 | } |
| 7045 | 7074 | ||
| 7046 | static void __init enable_swap_cgroup(void) | 7075 | static void __init enable_swap_cgroup(void) |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c6399e328931..a013bc94ebbe 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -435,7 +435,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, | |||
| 435 | if (av == NULL) /* Not actually mapped anymore */ | 435 | if (av == NULL) /* Not actually mapped anymore */ |
| 436 | return; | 436 | return; |
| 437 | 437 | ||
| 438 | pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 438 | pgoff = page_to_pgoff(page); |
| 439 | read_lock(&tasklist_lock); | 439 | read_lock(&tasklist_lock); |
| 440 | for_each_process (tsk) { | 440 | for_each_process (tsk) { |
| 441 | struct anon_vma_chain *vmac; | 441 | struct anon_vma_chain *vmac; |
| @@ -469,7 +469,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, | |||
| 469 | mutex_lock(&mapping->i_mmap_mutex); | 469 | mutex_lock(&mapping->i_mmap_mutex); |
| 470 | read_lock(&tasklist_lock); | 470 | read_lock(&tasklist_lock); |
| 471 | for_each_process(tsk) { | 471 | for_each_process(tsk) { |
| 472 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 472 | pgoff_t pgoff = page_to_pgoff(page); |
| 473 | struct task_struct *t = task_early_kill(tsk, force_early); | 473 | struct task_struct *t = task_early_kill(tsk, force_early); |
| 474 | 474 | ||
| 475 | if (!t) | 475 | if (!t) |
| @@ -895,7 +895,13 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
| 895 | struct page *hpage = *hpagep; | 895 | struct page *hpage = *hpagep; |
| 896 | struct page *ppage; | 896 | struct page *ppage; |
| 897 | 897 | ||
| 898 | if (PageReserved(p) || PageSlab(p) || !PageLRU(p)) | 898 | /* |
| 899 | * Here we are interested only in user-mapped pages, so skip any | ||
| 900 | * other types of pages. | ||
| 901 | */ | ||
| 902 | if (PageReserved(p) || PageSlab(p)) | ||
| 903 | return SWAP_SUCCESS; | ||
| 904 | if (!(PageLRU(hpage) || PageHuge(p))) | ||
| 899 | return SWAP_SUCCESS; | 905 | return SWAP_SUCCESS; |
| 900 | 906 | ||
| 901 | /* | 907 | /* |
| @@ -905,8 +911,10 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
| 905 | if (!page_mapped(hpage)) | 911 | if (!page_mapped(hpage)) |
| 906 | return SWAP_SUCCESS; | 912 | return SWAP_SUCCESS; |
| 907 | 913 | ||
| 908 | if (PageKsm(p)) | 914 | if (PageKsm(p)) { |
| 915 | pr_err("MCE %#lx: can't handle KSM pages.\n", pfn); | ||
| 909 | return SWAP_FAIL; | 916 | return SWAP_FAIL; |
| 917 | } | ||
| 910 | 918 | ||
| 911 | if (PageSwapCache(p)) { | 919 | if (PageSwapCache(p)) { |
| 912 | printk(KERN_ERR | 920 | printk(KERN_ERR |
| @@ -1229,7 +1237,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
| 1229 | */ | 1237 | */ |
| 1230 | if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage) | 1238 | if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage) |
| 1231 | != SWAP_SUCCESS) { | 1239 | != SWAP_SUCCESS) { |
| 1232 | printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); | 1240 | action_result(pfn, "unmapping failed", IGNORED); |
| 1233 | res = -EBUSY; | 1241 | res = -EBUSY; |
| 1234 | goto out; | 1242 | goto out; |
| 1235 | } | 1243 | } |
diff --git a/mm/memory.c b/mm/memory.c index d67fd9fcf1f2..8b44f765b645 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -2758,23 +2758,18 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, | |||
| 2758 | update_mmu_cache(vma, address, pte); | 2758 | update_mmu_cache(vma, address, pte); |
| 2759 | } | 2759 | } |
| 2760 | 2760 | ||
| 2761 | static unsigned long fault_around_bytes = 65536; | 2761 | static unsigned long fault_around_bytes = rounddown_pow_of_two(65536); |
| 2762 | 2762 | ||
| 2763 | /* | ||
| 2764 | * fault_around_pages() and fault_around_mask() round down fault_around_bytes | ||
| 2765 | * to nearest page order. It's what do_fault_around() expects to see. | ||
| 2766 | */ | ||
| 2767 | static inline unsigned long fault_around_pages(void) | 2763 | static inline unsigned long fault_around_pages(void) |
| 2768 | { | 2764 | { |
| 2769 | return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; | 2765 | return fault_around_bytes >> PAGE_SHIFT; |
| 2770 | } | 2766 | } |
| 2771 | 2767 | ||
| 2772 | static inline unsigned long fault_around_mask(void) | 2768 | static inline unsigned long fault_around_mask(void) |
| 2773 | { | 2769 | { |
| 2774 | return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; | 2770 | return ~(fault_around_bytes - 1) & PAGE_MASK; |
| 2775 | } | 2771 | } |
| 2776 | 2772 | ||
| 2777 | |||
| 2778 | #ifdef CONFIG_DEBUG_FS | 2773 | #ifdef CONFIG_DEBUG_FS |
| 2779 | static int fault_around_bytes_get(void *data, u64 *val) | 2774 | static int fault_around_bytes_get(void *data, u64 *val) |
| 2780 | { | 2775 | { |
| @@ -2782,11 +2777,19 @@ static int fault_around_bytes_get(void *data, u64 *val) | |||
| 2782 | return 0; | 2777 | return 0; |
| 2783 | } | 2778 | } |
| 2784 | 2779 | ||
| 2780 | /* | ||
| 2781 | * fault_around_pages() and fault_around_mask() expects fault_around_bytes | ||
| 2782 | * rounded down to nearest page order. It's what do_fault_around() expects to | ||
| 2783 | * see. | ||
| 2784 | */ | ||
| 2785 | static int fault_around_bytes_set(void *data, u64 val) | 2785 | static int fault_around_bytes_set(void *data, u64 val) |
| 2786 | { | 2786 | { |
| 2787 | if (val / PAGE_SIZE > PTRS_PER_PTE) | 2787 | if (val / PAGE_SIZE > PTRS_PER_PTE) |
| 2788 | return -EINVAL; | 2788 | return -EINVAL; |
| 2789 | fault_around_bytes = val; | 2789 | if (val > PAGE_SIZE) |
| 2790 | fault_around_bytes = rounddown_pow_of_two(val); | ||
| 2791 | else | ||
| 2792 | fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */ | ||
| 2790 | return 0; | 2793 | return 0; |
| 2791 | } | 2794 | } |
| 2792 | DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, | 2795 | DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, |
| @@ -2882,7 +2885,8 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2882 | * if page by the offset is not ready to be mapped (cold cache or | 2885 | * if page by the offset is not ready to be mapped (cold cache or |
| 2883 | * something). | 2886 | * something). |
| 2884 | */ | 2887 | */ |
| 2885 | if (vma->vm_ops->map_pages && fault_around_pages() > 1) { | 2888 | if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && |
| 2889 | fault_around_pages() > 1) { | ||
| 2886 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 2890 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
| 2887 | do_fault_around(vma, address, pte, pgoff, flags); | 2891 | do_fault_around(vma, address, pte, pgoff, flags); |
| 2888 | if (!pte_same(*pte, orig_pte)) | 2892 | if (!pte_same(*pte, orig_pte)) |
diff --git a/mm/migrate.c b/mm/migrate.c index 9e0beaa91845..be6dbf995c0c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -988,9 +988,10 @@ out: | |||
| 988 | * it. Otherwise, putback_lru_page() will drop the reference grabbed | 988 | * it. Otherwise, putback_lru_page() will drop the reference grabbed |
| 989 | * during isolation. | 989 | * during isolation. |
| 990 | */ | 990 | */ |
| 991 | if (rc != MIGRATEPAGE_SUCCESS && put_new_page) | 991 | if (rc != MIGRATEPAGE_SUCCESS && put_new_page) { |
| 992 | ClearPageSwapBacked(newpage); | ||
| 992 | put_new_page(newpage, private); | 993 | put_new_page(newpage, private); |
| 993 | else | 994 | } else |
| 994 | putback_lru_page(newpage); | 995 | putback_lru_page(newpage); |
| 995 | 996 | ||
| 996 | if (result) { | 997 | if (result) { |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 518e2c3f4c75..e0c943014eb7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -1306,9 +1306,9 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi, | |||
| 1306 | *bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); | 1306 | *bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); |
| 1307 | 1307 | ||
| 1308 | if (bdi_bg_thresh) | 1308 | if (bdi_bg_thresh) |
| 1309 | *bdi_bg_thresh = div_u64((u64)*bdi_thresh * | 1309 | *bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh * |
| 1310 | background_thresh, | 1310 | background_thresh, |
| 1311 | dirty_thresh); | 1311 | dirty_thresh) : 0; |
| 1312 | 1312 | ||
| 1313 | /* | 1313 | /* |
| 1314 | * In order to avoid the stacked BDI deadlock we need | 1314 | * In order to avoid the stacked BDI deadlock we need |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0ea758b898fd..ef44ad736ca1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -2447,7 +2447,7 @@ static inline int | |||
| 2447 | gfp_to_alloc_flags(gfp_t gfp_mask) | 2447 | gfp_to_alloc_flags(gfp_t gfp_mask) |
| 2448 | { | 2448 | { |
| 2449 | int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; | 2449 | int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; |
| 2450 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 2450 | const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD)); |
| 2451 | 2451 | ||
| 2452 | /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ | 2452 | /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ |
| 2453 | BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); | 2453 | BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); |
| @@ -2456,20 +2456,20 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
| 2456 | * The caller may dip into page reserves a bit more if the caller | 2456 | * The caller may dip into page reserves a bit more if the caller |
| 2457 | * cannot run direct reclaim, or if the caller has realtime scheduling | 2457 | * cannot run direct reclaim, or if the caller has realtime scheduling |
| 2458 | * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will | 2458 | * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will |
| 2459 | * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH). | 2459 | * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH). |
| 2460 | */ | 2460 | */ |
| 2461 | alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); | 2461 | alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); |
| 2462 | 2462 | ||
| 2463 | if (!wait) { | 2463 | if (atomic) { |
| 2464 | /* | 2464 | /* |
| 2465 | * Not worth trying to allocate harder for | 2465 | * Not worth trying to allocate harder for __GFP_NOMEMALLOC even |
| 2466 | * __GFP_NOMEMALLOC even if it can't schedule. | 2466 | * if it can't schedule. |
| 2467 | */ | 2467 | */ |
| 2468 | if (!(gfp_mask & __GFP_NOMEMALLOC)) | 2468 | if (!(gfp_mask & __GFP_NOMEMALLOC)) |
| 2469 | alloc_flags |= ALLOC_HARDER; | 2469 | alloc_flags |= ALLOC_HARDER; |
| 2470 | /* | 2470 | /* |
| 2471 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. | 2471 | * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the |
| 2472 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 2472 | * comment for __cpuset_node_allowed_softwall(). |
| 2473 | */ | 2473 | */ |
| 2474 | alloc_flags &= ~ALLOC_CPUSET; | 2474 | alloc_flags &= ~ALLOC_CPUSET; |
| 2475 | } else if (unlikely(rt_task(current)) && !in_interrupt()) | 2475 | } else if (unlikely(rt_task(current)) && !in_interrupt()) |
| @@ -6062,11 +6062,13 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) | |||
| 6062 | } | 6062 | } |
| 6063 | 6063 | ||
| 6064 | /** | 6064 | /** |
| 6065 | * get_pageblock_flags_group - Return the requested group of flags for the pageblock_nr_pages block of pages | 6065 | * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages |
| 6066 | * @page: The page within the block of interest | 6066 | * @page: The page within the block of interest |
| 6067 | * @start_bitidx: The first bit of interest to retrieve | 6067 | * @pfn: The target page frame number |
| 6068 | * @end_bitidx: The last bit of interest | 6068 | * @end_bitidx: The last bit of interest to retrieve |
| 6069 | * returns pageblock_bits flags | 6069 | * @mask: mask of bits that the caller is interested in |
| 6070 | * | ||
| 6071 | * Return: pageblock_bits flags | ||
| 6070 | */ | 6072 | */ |
| 6071 | unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, | 6073 | unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, |
| 6072 | unsigned long end_bitidx, | 6074 | unsigned long end_bitidx, |
| @@ -6091,9 +6093,10 @@ unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, | |||
| 6091 | /** | 6093 | /** |
| 6092 | * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages | 6094 | * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages |
| 6093 | * @page: The page within the block of interest | 6095 | * @page: The page within the block of interest |
| 6094 | * @start_bitidx: The first bit of interest | ||
| 6095 | * @end_bitidx: The last bit of interest | ||
| 6096 | * @flags: The flags to set | 6096 | * @flags: The flags to set |
| 6097 | * @pfn: The target page frame number | ||
| 6098 | * @end_bitidx: The last bit of interest | ||
| 6099 | * @mask: mask of bits that the caller is interested in | ||
| 6097 | */ | 6100 | */ |
| 6098 | void set_pfnblock_flags_mask(struct page *page, unsigned long flags, | 6101 | void set_pfnblock_flags_mask(struct page *page, unsigned long flags, |
| 6099 | unsigned long pfn, | 6102 | unsigned long pfn, |
diff --git a/mm/percpu.c b/mm/percpu.c index 2ddf9a990dbd..2139e30a4b44 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
| @@ -720,8 +720,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
| 720 | if (unlikely(align < 2)) | 720 | if (unlikely(align < 2)) |
| 721 | align = 2; | 721 | align = 2; |
| 722 | 722 | ||
| 723 | if (unlikely(size & 1)) | 723 | size = ALIGN(size, 2); |
| 724 | size++; | ||
| 725 | 724 | ||
| 726 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { | 725 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
| 727 | WARN(true, "illegal size (%zu) or align (%zu) for " | 726 | WARN(true, "illegal size (%zu) or align (%zu) for " |
| @@ -517,11 +517,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma) | |||
| 517 | static inline unsigned long | 517 | static inline unsigned long |
| 518 | __vma_address(struct page *page, struct vm_area_struct *vma) | 518 | __vma_address(struct page *page, struct vm_area_struct *vma) |
| 519 | { | 519 | { |
| 520 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 520 | pgoff_t pgoff = page_to_pgoff(page); |
| 521 | |||
| 522 | if (unlikely(is_vm_hugetlb_page(vma))) | ||
| 523 | pgoff = page->index << huge_page_order(page_hstate(page)); | ||
| 524 | |||
| 525 | return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 521 | return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
| 526 | } | 522 | } |
| 527 | 523 | ||
| @@ -1639,7 +1635,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page, | |||
| 1639 | static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) | 1635 | static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) |
| 1640 | { | 1636 | { |
| 1641 | struct anon_vma *anon_vma; | 1637 | struct anon_vma *anon_vma; |
| 1642 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1638 | pgoff_t pgoff = page_to_pgoff(page); |
| 1643 | struct anon_vma_chain *avc; | 1639 | struct anon_vma_chain *avc; |
| 1644 | int ret = SWAP_AGAIN; | 1640 | int ret = SWAP_AGAIN; |
| 1645 | 1641 | ||
| @@ -1680,7 +1676,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) | |||
| 1680 | static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) | 1676 | static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) |
| 1681 | { | 1677 | { |
| 1682 | struct address_space *mapping = page->mapping; | 1678 | struct address_space *mapping = page->mapping; |
| 1683 | pgoff_t pgoff = page->index << compound_order(page); | 1679 | pgoff_t pgoff = page_to_pgoff(page); |
| 1684 | struct vm_area_struct *vma; | 1680 | struct vm_area_struct *vma; |
| 1685 | int ret = SWAP_AGAIN; | 1681 | int ret = SWAP_AGAIN; |
| 1686 | 1682 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index 1140f49b6ded..af68b15a8fc1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -85,7 +85,7 @@ static struct vfsmount *shm_mnt; | |||
| 85 | * a time): we would prefer not to enlarge the shmem inode just for that. | 85 | * a time): we would prefer not to enlarge the shmem inode just for that. |
| 86 | */ | 86 | */ |
| 87 | struct shmem_falloc { | 87 | struct shmem_falloc { |
| 88 | int mode; /* FALLOC_FL mode currently operating */ | 88 | wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ |
| 89 | pgoff_t start; /* start of range currently being fallocated */ | 89 | pgoff_t start; /* start of range currently being fallocated */ |
| 90 | pgoff_t next; /* the next page offset to be fallocated */ | 90 | pgoff_t next; /* the next page offset to be fallocated */ |
| 91 | pgoff_t nr_falloced; /* how many new pages have been fallocated */ | 91 | pgoff_t nr_falloced; /* how many new pages have been fallocated */ |
| @@ -468,23 +468,20 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
| 468 | return; | 468 | return; |
| 469 | 469 | ||
| 470 | index = start; | 470 | index = start; |
| 471 | for ( ; ; ) { | 471 | while (index < end) { |
| 472 | cond_resched(); | 472 | cond_resched(); |
| 473 | 473 | ||
| 474 | pvec.nr = find_get_entries(mapping, index, | 474 | pvec.nr = find_get_entries(mapping, index, |
| 475 | min(end - index, (pgoff_t)PAGEVEC_SIZE), | 475 | min(end - index, (pgoff_t)PAGEVEC_SIZE), |
| 476 | pvec.pages, indices); | 476 | pvec.pages, indices); |
| 477 | if (!pvec.nr) { | 477 | if (!pvec.nr) { |
| 478 | if (index == start || unfalloc) | 478 | /* If all gone or hole-punch or unfalloc, we're done */ |
| 479 | if (index == start || end != -1) | ||
| 479 | break; | 480 | break; |
| 481 | /* But if truncating, restart to make sure all gone */ | ||
| 480 | index = start; | 482 | index = start; |
| 481 | continue; | 483 | continue; |
| 482 | } | 484 | } |
| 483 | if ((index == start || unfalloc) && indices[0] >= end) { | ||
| 484 | pagevec_remove_exceptionals(&pvec); | ||
| 485 | pagevec_release(&pvec); | ||
| 486 | break; | ||
| 487 | } | ||
| 488 | mem_cgroup_uncharge_start(); | 485 | mem_cgroup_uncharge_start(); |
| 489 | for (i = 0; i < pagevec_count(&pvec); i++) { | 486 | for (i = 0; i < pagevec_count(&pvec); i++) { |
| 490 | struct page *page = pvec.pages[i]; | 487 | struct page *page = pvec.pages[i]; |
| @@ -496,8 +493,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
| 496 | if (radix_tree_exceptional_entry(page)) { | 493 | if (radix_tree_exceptional_entry(page)) { |
| 497 | if (unfalloc) | 494 | if (unfalloc) |
| 498 | continue; | 495 | continue; |
| 499 | nr_swaps_freed += !shmem_free_swap(mapping, | 496 | if (shmem_free_swap(mapping, index, page)) { |
| 500 | index, page); | 497 | /* Swap was replaced by page: retry */ |
| 498 | index--; | ||
| 499 | break; | ||
| 500 | } | ||
| 501 | nr_swaps_freed++; | ||
| 501 | continue; | 502 | continue; |
| 502 | } | 503 | } |
| 503 | 504 | ||
| @@ -506,6 +507,11 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
| 506 | if (page->mapping == mapping) { | 507 | if (page->mapping == mapping) { |
| 507 | VM_BUG_ON_PAGE(PageWriteback(page), page); | 508 | VM_BUG_ON_PAGE(PageWriteback(page), page); |
| 508 | truncate_inode_page(mapping, page); | 509 | truncate_inode_page(mapping, page); |
| 510 | } else { | ||
| 511 | /* Page was replaced by swap: retry */ | ||
| 512 | unlock_page(page); | ||
| 513 | index--; | ||
| 514 | break; | ||
| 509 | } | 515 | } |
| 510 | } | 516 | } |
| 511 | unlock_page(page); | 517 | unlock_page(page); |
| @@ -760,7 +766,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
| 760 | spin_lock(&inode->i_lock); | 766 | spin_lock(&inode->i_lock); |
| 761 | shmem_falloc = inode->i_private; | 767 | shmem_falloc = inode->i_private; |
| 762 | if (shmem_falloc && | 768 | if (shmem_falloc && |
| 763 | !shmem_falloc->mode && | 769 | !shmem_falloc->waitq && |
| 764 | index >= shmem_falloc->start && | 770 | index >= shmem_falloc->start && |
| 765 | index < shmem_falloc->next) | 771 | index < shmem_falloc->next) |
| 766 | shmem_falloc->nr_unswapped++; | 772 | shmem_falloc->nr_unswapped++; |
| @@ -1248,38 +1254,58 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 1248 | * Trinity finds that probing a hole which tmpfs is punching can | 1254 | * Trinity finds that probing a hole which tmpfs is punching can |
| 1249 | * prevent the hole-punch from ever completing: which in turn | 1255 | * prevent the hole-punch from ever completing: which in turn |
| 1250 | * locks writers out with its hold on i_mutex. So refrain from | 1256 | * locks writers out with its hold on i_mutex. So refrain from |
| 1251 | * faulting pages into the hole while it's being punched, and | 1257 | * faulting pages into the hole while it's being punched. Although |
| 1252 | * wait on i_mutex to be released if vmf->flags permits. | 1258 | * shmem_undo_range() does remove the additions, it may be unable to |
| 1259 | * keep up, as each new page needs its own unmap_mapping_range() call, | ||
| 1260 | * and the i_mmap tree grows ever slower to scan if new vmas are added. | ||
| 1261 | * | ||
| 1262 | * It does not matter if we sometimes reach this check just before the | ||
| 1263 | * hole-punch begins, so that one fault then races with the punch: | ||
| 1264 | * we just need to make racing faults a rare case. | ||
| 1265 | * | ||
| 1266 | * The implementation below would be much simpler if we just used a | ||
| 1267 | * standard mutex or completion: but we cannot take i_mutex in fault, | ||
| 1268 | * and bloating every shmem inode for this unlikely case would be sad. | ||
| 1253 | */ | 1269 | */ |
| 1254 | if (unlikely(inode->i_private)) { | 1270 | if (unlikely(inode->i_private)) { |
| 1255 | struct shmem_falloc *shmem_falloc; | 1271 | struct shmem_falloc *shmem_falloc; |
| 1256 | 1272 | ||
| 1257 | spin_lock(&inode->i_lock); | 1273 | spin_lock(&inode->i_lock); |
| 1258 | shmem_falloc = inode->i_private; | 1274 | shmem_falloc = inode->i_private; |
| 1259 | if (!shmem_falloc || | 1275 | if (shmem_falloc && |
| 1260 | shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE || | 1276 | shmem_falloc->waitq && |
| 1261 | vmf->pgoff < shmem_falloc->start || | 1277 | vmf->pgoff >= shmem_falloc->start && |
| 1262 | vmf->pgoff >= shmem_falloc->next) | 1278 | vmf->pgoff < shmem_falloc->next) { |
| 1263 | shmem_falloc = NULL; | 1279 | wait_queue_head_t *shmem_falloc_waitq; |
| 1264 | spin_unlock(&inode->i_lock); | 1280 | DEFINE_WAIT(shmem_fault_wait); |
| 1265 | /* | 1281 | |
| 1266 | * i_lock has protected us from taking shmem_falloc seriously | 1282 | ret = VM_FAULT_NOPAGE; |
| 1267 | * once return from shmem_fallocate() went back up that stack. | ||
| 1268 | * i_lock does not serialize with i_mutex at all, but it does | ||
| 1269 | * not matter if sometimes we wait unnecessarily, or sometimes | ||
| 1270 | * miss out on waiting: we just need to make those cases rare. | ||
| 1271 | */ | ||
| 1272 | if (shmem_falloc) { | ||
| 1273 | if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && | 1283 | if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && |
| 1274 | !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { | 1284 | !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { |
| 1285 | /* It's polite to up mmap_sem if we can */ | ||
| 1275 | up_read(&vma->vm_mm->mmap_sem); | 1286 | up_read(&vma->vm_mm->mmap_sem); |
| 1276 | mutex_lock(&inode->i_mutex); | 1287 | ret = VM_FAULT_RETRY; |
| 1277 | mutex_unlock(&inode->i_mutex); | ||
| 1278 | return VM_FAULT_RETRY; | ||
| 1279 | } | 1288 | } |
| 1280 | /* cond_resched? Leave that to GUP or return to user */ | 1289 | |
| 1281 | return VM_FAULT_NOPAGE; | 1290 | shmem_falloc_waitq = shmem_falloc->waitq; |
| 1291 | prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, | ||
| 1292 | TASK_UNINTERRUPTIBLE); | ||
| 1293 | spin_unlock(&inode->i_lock); | ||
| 1294 | schedule(); | ||
| 1295 | |||
| 1296 | /* | ||
| 1297 | * shmem_falloc_waitq points into the shmem_fallocate() | ||
| 1298 | * stack of the hole-punching task: shmem_falloc_waitq | ||
| 1299 | * is usually invalid by the time we reach here, but | ||
| 1300 | * finish_wait() does not dereference it in that case; | ||
| 1301 | * though i_lock needed lest racing with wake_up_all(). | ||
| 1302 | */ | ||
| 1303 | spin_lock(&inode->i_lock); | ||
| 1304 | finish_wait(shmem_falloc_waitq, &shmem_fault_wait); | ||
| 1305 | spin_unlock(&inode->i_lock); | ||
| 1306 | return ret; | ||
| 1282 | } | 1307 | } |
| 1308 | spin_unlock(&inode->i_lock); | ||
| 1283 | } | 1309 | } |
| 1284 | 1310 | ||
| 1285 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); | 1311 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); |
| @@ -1774,13 +1800,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
| 1774 | 1800 | ||
| 1775 | mutex_lock(&inode->i_mutex); | 1801 | mutex_lock(&inode->i_mutex); |
| 1776 | 1802 | ||
| 1777 | shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE; | ||
| 1778 | |||
| 1779 | if (mode & FALLOC_FL_PUNCH_HOLE) { | 1803 | if (mode & FALLOC_FL_PUNCH_HOLE) { |
| 1780 | struct address_space *mapping = file->f_mapping; | 1804 | struct address_space *mapping = file->f_mapping; |
| 1781 | loff_t unmap_start = round_up(offset, PAGE_SIZE); | 1805 | loff_t unmap_start = round_up(offset, PAGE_SIZE); |
| 1782 | loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; | 1806 | loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; |
| 1807 | DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); | ||
| 1783 | 1808 | ||
| 1809 | shmem_falloc.waitq = &shmem_falloc_waitq; | ||
| 1784 | shmem_falloc.start = unmap_start >> PAGE_SHIFT; | 1810 | shmem_falloc.start = unmap_start >> PAGE_SHIFT; |
| 1785 | shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; | 1811 | shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; |
| 1786 | spin_lock(&inode->i_lock); | 1812 | spin_lock(&inode->i_lock); |
| @@ -1792,8 +1818,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
| 1792 | 1 + unmap_end - unmap_start, 0); | 1818 | 1 + unmap_end - unmap_start, 0); |
| 1793 | shmem_truncate_range(inode, offset, offset + len - 1); | 1819 | shmem_truncate_range(inode, offset, offset + len - 1); |
| 1794 | /* No need to unmap again: hole-punching leaves COWed pages */ | 1820 | /* No need to unmap again: hole-punching leaves COWed pages */ |
| 1821 | |||
| 1822 | spin_lock(&inode->i_lock); | ||
| 1823 | inode->i_private = NULL; | ||
| 1824 | wake_up_all(&shmem_falloc_waitq); | ||
| 1825 | spin_unlock(&inode->i_lock); | ||
| 1795 | error = 0; | 1826 | error = 0; |
| 1796 | goto undone; | 1827 | goto out; |
| 1797 | } | 1828 | } |
| 1798 | 1829 | ||
| 1799 | /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ | 1830 | /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ |
| @@ -1809,6 +1840,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
| 1809 | goto out; | 1840 | goto out; |
| 1810 | } | 1841 | } |
| 1811 | 1842 | ||
| 1843 | shmem_falloc.waitq = NULL; | ||
| 1812 | shmem_falloc.start = start; | 1844 | shmem_falloc.start = start; |
| 1813 | shmem_falloc.next = start; | 1845 | shmem_falloc.next = start; |
| 1814 | shmem_falloc.nr_falloced = 0; | 1846 | shmem_falloc.nr_falloced = 0; |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 735e01a0db6f..d31c4bacc6a2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
| @@ -55,7 +55,7 @@ static int kmem_cache_sanity_check(const char *name, size_t size) | |||
| 55 | continue; | 55 | continue; |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | #if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) | 58 | #if !defined(CONFIG_SLUB) |
| 59 | if (!strcmp(s->name, name)) { | 59 | if (!strcmp(s->name, name)) { |
| 60 | pr_err("%s (%s): Cache name already exists.\n", | 60 | pr_err("%s (%s): Cache name already exists.\n", |
| 61 | __func__, name); | 61 | __func__, name); |
diff --git a/mm/truncate.c b/mm/truncate.c index 6a78c814bebf..eda247307164 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -355,14 +355,16 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
| 355 | for ( ; ; ) { | 355 | for ( ; ; ) { |
| 356 | cond_resched(); | 356 | cond_resched(); |
| 357 | if (!pagevec_lookup_entries(&pvec, mapping, index, | 357 | if (!pagevec_lookup_entries(&pvec, mapping, index, |
| 358 | min(end - index, (pgoff_t)PAGEVEC_SIZE), | 358 | min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { |
| 359 | indices)) { | 359 | /* If all gone from start onwards, we're done */ |
| 360 | if (index == start) | 360 | if (index == start) |
| 361 | break; | 361 | break; |
| 362 | /* Otherwise restart to make sure all gone */ | ||
| 362 | index = start; | 363 | index = start; |
| 363 | continue; | 364 | continue; |
| 364 | } | 365 | } |
| 365 | if (index == start && indices[0] >= end) { | 366 | if (index == start && indices[0] >= end) { |
| 367 | /* All gone out of hole to be punched, we're done */ | ||
| 366 | pagevec_remove_exceptionals(&pvec); | 368 | pagevec_remove_exceptionals(&pvec); |
| 367 | pagevec_release(&pvec); | 369 | pagevec_release(&pvec); |
| 368 | break; | 370 | break; |
| @@ -373,8 +375,11 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
| 373 | 375 | ||
| 374 | /* We rely upon deletion not changing page->index */ | 376 | /* We rely upon deletion not changing page->index */ |
| 375 | index = indices[i]; | 377 | index = indices[i]; |
| 376 | if (index >= end) | 378 | if (index >= end) { |
| 379 | /* Restart punch to make sure all gone */ | ||
| 380 | index = start - 1; | ||
| 377 | break; | 381 | break; |
| 382 | } | ||
| 378 | 383 | ||
| 379 | if (radix_tree_exceptional_entry(page)) { | 384 | if (radix_tree_exceptional_entry(page)) { |
| 380 | clear_exceptional_entry(mapping, index, page); | 385 | clear_exceptional_entry(mapping, index, page); |
