diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 57 | ||||
-rw-r--r-- | mm/hugetlb.c | 70 | ||||
-rw-r--r-- | mm/ksm.c | 1 | ||||
-rw-r--r-- | mm/memory-failure.c | 13 | ||||
-rw-r--r-- | mm/memory.c | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 48 | ||||
-rw-r--r-- | mm/migrate.c | 7 | ||||
-rw-r--r-- | mm/msync.c | 3 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 56 | ||||
-rw-r--r-- | mm/rmap.c | 22 | ||||
-rw-r--r-- | mm/shmem.c | 122 | ||||
-rw-r--r-- | mm/slab.c | 90 | ||||
-rw-r--r-- | mm/slab_common.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 6 | ||||
-rw-r--r-- | mm/truncate.c | 11 |
16 files changed, 366 insertions, 147 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e60837dc785c..33514d88fef9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -941,6 +941,37 @@ unlock: | |||
941 | spin_unlock(ptl); | 941 | spin_unlock(ptl); |
942 | } | 942 | } |
943 | 943 | ||
944 | /* | ||
945 | * Save CONFIG_DEBUG_PAGEALLOC from faulting falsely on tail pages | ||
946 | * during copy_user_huge_page()'s copy_page_rep(): in the case when | ||
947 | * the source page gets split and a tail freed before copy completes. | ||
948 | * Called under pmd_lock of checked pmd, so safe from splitting itself. | ||
949 | */ | ||
950 | static void get_user_huge_page(struct page *page) | ||
951 | { | ||
952 | if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) { | ||
953 | struct page *endpage = page + HPAGE_PMD_NR; | ||
954 | |||
955 | atomic_add(HPAGE_PMD_NR, &page->_count); | ||
956 | while (++page < endpage) | ||
957 | get_huge_page_tail(page); | ||
958 | } else { | ||
959 | get_page(page); | ||
960 | } | ||
961 | } | ||
962 | |||
963 | static void put_user_huge_page(struct page *page) | ||
964 | { | ||
965 | if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) { | ||
966 | struct page *endpage = page + HPAGE_PMD_NR; | ||
967 | |||
968 | while (page < endpage) | ||
969 | put_page(page++); | ||
970 | } else { | ||
971 | put_page(page); | ||
972 | } | ||
973 | } | ||
974 | |||
944 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | 975 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, |
945 | struct vm_area_struct *vma, | 976 | struct vm_area_struct *vma, |
946 | unsigned long address, | 977 | unsigned long address, |
@@ -1074,7 +1105,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1074 | ret |= VM_FAULT_WRITE; | 1105 | ret |= VM_FAULT_WRITE; |
1075 | goto out_unlock; | 1106 | goto out_unlock; |
1076 | } | 1107 | } |
1077 | get_page(page); | 1108 | get_user_huge_page(page); |
1078 | spin_unlock(ptl); | 1109 | spin_unlock(ptl); |
1079 | alloc: | 1110 | alloc: |
1080 | if (transparent_hugepage_enabled(vma) && | 1111 | if (transparent_hugepage_enabled(vma) && |
@@ -1095,7 +1126,7 @@ alloc: | |||
1095 | split_huge_page(page); | 1126 | split_huge_page(page); |
1096 | ret |= VM_FAULT_FALLBACK; | 1127 | ret |= VM_FAULT_FALLBACK; |
1097 | } | 1128 | } |
1098 | put_page(page); | 1129 | put_user_huge_page(page); |
1099 | } | 1130 | } |
1100 | count_vm_event(THP_FAULT_FALLBACK); | 1131 | count_vm_event(THP_FAULT_FALLBACK); |
1101 | goto out; | 1132 | goto out; |
@@ -1105,7 +1136,7 @@ alloc: | |||
1105 | put_page(new_page); | 1136 | put_page(new_page); |
1106 | if (page) { | 1137 | if (page) { |
1107 | split_huge_page(page); | 1138 | split_huge_page(page); |
1108 | put_page(page); | 1139 | put_user_huge_page(page); |
1109 | } else | 1140 | } else |
1110 | split_huge_page_pmd(vma, address, pmd); | 1141 | split_huge_page_pmd(vma, address, pmd); |
1111 | ret |= VM_FAULT_FALLBACK; | 1142 | ret |= VM_FAULT_FALLBACK; |
@@ -1127,7 +1158,7 @@ alloc: | |||
1127 | 1158 | ||
1128 | spin_lock(ptl); | 1159 | spin_lock(ptl); |
1129 | if (page) | 1160 | if (page) |
1130 | put_page(page); | 1161 | put_user_huge_page(page); |
1131 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { | 1162 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { |
1132 | spin_unlock(ptl); | 1163 | spin_unlock(ptl); |
1133 | mem_cgroup_uncharge_page(new_page); | 1164 | mem_cgroup_uncharge_page(new_page); |
@@ -2392,8 +2423,6 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2392 | pmd = mm_find_pmd(mm, address); | 2423 | pmd = mm_find_pmd(mm, address); |
2393 | if (!pmd) | 2424 | if (!pmd) |
2394 | goto out; | 2425 | goto out; |
2395 | if (pmd_trans_huge(*pmd)) | ||
2396 | goto out; | ||
2397 | 2426 | ||
2398 | anon_vma_lock_write(vma->anon_vma); | 2427 | anon_vma_lock_write(vma->anon_vma); |
2399 | 2428 | ||
@@ -2492,8 +2521,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2492 | pmd = mm_find_pmd(mm, address); | 2521 | pmd = mm_find_pmd(mm, address); |
2493 | if (!pmd) | 2522 | if (!pmd) |
2494 | goto out; | 2523 | goto out; |
2495 | if (pmd_trans_huge(*pmd)) | ||
2496 | goto out; | ||
2497 | 2524 | ||
2498 | memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); | 2525 | memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); |
2499 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 2526 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
@@ -2846,12 +2873,22 @@ void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, | |||
2846 | static void split_huge_page_address(struct mm_struct *mm, | 2873 | static void split_huge_page_address(struct mm_struct *mm, |
2847 | unsigned long address) | 2874 | unsigned long address) |
2848 | { | 2875 | { |
2876 | pgd_t *pgd; | ||
2877 | pud_t *pud; | ||
2849 | pmd_t *pmd; | 2878 | pmd_t *pmd; |
2850 | 2879 | ||
2851 | VM_BUG_ON(!(address & ~HPAGE_PMD_MASK)); | 2880 | VM_BUG_ON(!(address & ~HPAGE_PMD_MASK)); |
2852 | 2881 | ||
2853 | pmd = mm_find_pmd(mm, address); | 2882 | pgd = pgd_offset(mm, address); |
2854 | if (!pmd) | 2883 | if (!pgd_present(*pgd)) |
2884 | return; | ||
2885 | |||
2886 | pud = pud_offset(pgd, address); | ||
2887 | if (!pud_present(*pud)) | ||
2888 | return; | ||
2889 | |||
2890 | pmd = pmd_offset(pud, address); | ||
2891 | if (!pmd_present(*pmd)) | ||
2855 | return; | 2892 | return; |
2856 | /* | 2893 | /* |
2857 | * Caller holds the mmap_sem write mode, so a huge pmd cannot | 2894 | * Caller holds the mmap_sem write mode, so a huge pmd cannot |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 226910cb7c9b..9221c02ed9e2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2520,6 +2520,31 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, | |||
2520 | update_mmu_cache(vma, address, ptep); | 2520 | update_mmu_cache(vma, address, ptep); |
2521 | } | 2521 | } |
2522 | 2522 | ||
2523 | static int is_hugetlb_entry_migration(pte_t pte) | ||
2524 | { | ||
2525 | swp_entry_t swp; | ||
2526 | |||
2527 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2528 | return 0; | ||
2529 | swp = pte_to_swp_entry(pte); | ||
2530 | if (non_swap_entry(swp) && is_migration_entry(swp)) | ||
2531 | return 1; | ||
2532 | else | ||
2533 | return 0; | ||
2534 | } | ||
2535 | |||
2536 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | ||
2537 | { | ||
2538 | swp_entry_t swp; | ||
2539 | |||
2540 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2541 | return 0; | ||
2542 | swp = pte_to_swp_entry(pte); | ||
2543 | if (non_swap_entry(swp) && is_hwpoison_entry(swp)) | ||
2544 | return 1; | ||
2545 | else | ||
2546 | return 0; | ||
2547 | } | ||
2523 | 2548 | ||
2524 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | 2549 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, |
2525 | struct vm_area_struct *vma) | 2550 | struct vm_area_struct *vma) |
@@ -2559,7 +2584,24 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2559 | dst_ptl = huge_pte_lock(h, dst, dst_pte); | 2584 | dst_ptl = huge_pte_lock(h, dst, dst_pte); |
2560 | src_ptl = huge_pte_lockptr(h, src, src_pte); | 2585 | src_ptl = huge_pte_lockptr(h, src, src_pte); |
2561 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | 2586 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); |
2562 | if (!huge_pte_none(huge_ptep_get(src_pte))) { | 2587 | entry = huge_ptep_get(src_pte); |
2588 | if (huge_pte_none(entry)) { /* skip none entry */ | ||
2589 | ; | ||
2590 | } else if (unlikely(is_hugetlb_entry_migration(entry) || | ||
2591 | is_hugetlb_entry_hwpoisoned(entry))) { | ||
2592 | swp_entry_t swp_entry = pte_to_swp_entry(entry); | ||
2593 | |||
2594 | if (is_write_migration_entry(swp_entry) && cow) { | ||
2595 | /* | ||
2596 | * COW mappings require pages in both | ||
2597 | * parent and child to be set to read. | ||
2598 | */ | ||
2599 | make_migration_entry_read(&swp_entry); | ||
2600 | entry = swp_entry_to_pte(swp_entry); | ||
2601 | set_huge_pte_at(src, addr, src_pte, entry); | ||
2602 | } | ||
2603 | set_huge_pte_at(dst, addr, dst_pte, entry); | ||
2604 | } else { | ||
2563 | if (cow) | 2605 | if (cow) |
2564 | huge_ptep_set_wrprotect(src, addr, src_pte); | 2606 | huge_ptep_set_wrprotect(src, addr, src_pte); |
2565 | entry = huge_ptep_get(src_pte); | 2607 | entry = huge_ptep_get(src_pte); |
@@ -2578,32 +2620,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2578 | return ret; | 2620 | return ret; |
2579 | } | 2621 | } |
2580 | 2622 | ||
2581 | static int is_hugetlb_entry_migration(pte_t pte) | ||
2582 | { | ||
2583 | swp_entry_t swp; | ||
2584 | |||
2585 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2586 | return 0; | ||
2587 | swp = pte_to_swp_entry(pte); | ||
2588 | if (non_swap_entry(swp) && is_migration_entry(swp)) | ||
2589 | return 1; | ||
2590 | else | ||
2591 | return 0; | ||
2592 | } | ||
2593 | |||
2594 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | ||
2595 | { | ||
2596 | swp_entry_t swp; | ||
2597 | |||
2598 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2599 | return 0; | ||
2600 | swp = pte_to_swp_entry(pte); | ||
2601 | if (non_swap_entry(swp) && is_hwpoison_entry(swp)) | ||
2602 | return 1; | ||
2603 | else | ||
2604 | return 0; | ||
2605 | } | ||
2606 | |||
2607 | void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | 2623 | void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, |
2608 | unsigned long start, unsigned long end, | 2624 | unsigned long start, unsigned long end, |
2609 | struct page *ref_page) | 2625 | struct page *ref_page) |
@@ -945,7 +945,6 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
945 | pmd = mm_find_pmd(mm, addr); | 945 | pmd = mm_find_pmd(mm, addr); |
946 | if (!pmd) | 946 | if (!pmd) |
947 | goto out; | 947 | goto out; |
948 | BUG_ON(pmd_trans_huge(*pmd)); | ||
949 | 948 | ||
950 | mmun_start = addr; | 949 | mmun_start = addr; |
951 | mmun_end = addr + PAGE_SIZE; | 950 | mmun_end = addr + PAGE_SIZE; |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index cd8989c1027e..7211a73ba14d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -435,7 +435,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, | |||
435 | if (av == NULL) /* Not actually mapped anymore */ | 435 | if (av == NULL) /* Not actually mapped anymore */ |
436 | return; | 436 | return; |
437 | 437 | ||
438 | pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 438 | pgoff = page_to_pgoff(page); |
439 | read_lock(&tasklist_lock); | 439 | read_lock(&tasklist_lock); |
440 | for_each_process (tsk) { | 440 | for_each_process (tsk) { |
441 | struct anon_vma_chain *vmac; | 441 | struct anon_vma_chain *vmac; |
@@ -469,7 +469,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, | |||
469 | mutex_lock(&mapping->i_mmap_mutex); | 469 | mutex_lock(&mapping->i_mmap_mutex); |
470 | read_lock(&tasklist_lock); | 470 | read_lock(&tasklist_lock); |
471 | for_each_process(tsk) { | 471 | for_each_process(tsk) { |
472 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 472 | pgoff_t pgoff = page_to_pgoff(page); |
473 | struct task_struct *t = task_early_kill(tsk, force_early); | 473 | struct task_struct *t = task_early_kill(tsk, force_early); |
474 | 474 | ||
475 | if (!t) | 475 | if (!t) |
@@ -895,7 +895,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
895 | struct page *hpage = *hpagep; | 895 | struct page *hpage = *hpagep; |
896 | struct page *ppage; | 896 | struct page *ppage; |
897 | 897 | ||
898 | if (PageReserved(p) || PageSlab(p)) | 898 | if (PageReserved(p) || PageSlab(p) || !PageLRU(p)) |
899 | return SWAP_SUCCESS; | 899 | return SWAP_SUCCESS; |
900 | 900 | ||
901 | /* | 901 | /* |
@@ -1159,9 +1159,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1159 | action_result(pfn, "free buddy, 2nd try", DELAYED); | 1159 | action_result(pfn, "free buddy, 2nd try", DELAYED); |
1160 | return 0; | 1160 | return 0; |
1161 | } | 1161 | } |
1162 | action_result(pfn, "non LRU", IGNORED); | ||
1163 | put_page(p); | ||
1164 | return -EBUSY; | ||
1165 | } | 1162 | } |
1166 | } | 1163 | } |
1167 | 1164 | ||
@@ -1194,6 +1191,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1194 | return 0; | 1191 | return 0; |
1195 | } | 1192 | } |
1196 | 1193 | ||
1194 | if (!PageHuge(p) && !PageTransTail(p) && !PageLRU(p)) | ||
1195 | goto identify_page_state; | ||
1196 | |||
1197 | /* | 1197 | /* |
1198 | * For error on the tail page, we should set PG_hwpoison | 1198 | * For error on the tail page, we should set PG_hwpoison |
1199 | * on the head page to show that the hugepage is hwpoisoned | 1199 | * on the head page to show that the hugepage is hwpoisoned |
@@ -1243,6 +1243,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) | |||
1243 | goto out; | 1243 | goto out; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | identify_page_state: | ||
1246 | res = -EBUSY; | 1247 | res = -EBUSY; |
1247 | /* | 1248 | /* |
1248 | * The first check uses the current page flags which may not have any | 1249 | * The first check uses the current page flags which may not have any |
diff --git a/mm/memory.c b/mm/memory.c index d67fd9fcf1f2..7e8d8205b610 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2882,7 +2882,8 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2882 | * if page by the offset is not ready to be mapped (cold cache or | 2882 | * if page by the offset is not ready to be mapped (cold cache or |
2883 | * something). | 2883 | * something). |
2884 | */ | 2884 | */ |
2885 | if (vma->vm_ops->map_pages && fault_around_pages() > 1) { | 2885 | if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && |
2886 | fault_around_pages() > 1) { | ||
2886 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 2887 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
2887 | do_fault_around(vma, address, pte, pgoff, flags); | 2888 | do_fault_around(vma, address, pte, pgoff, flags); |
2888 | if (!pte_same(*pte, orig_pte)) | 2889 | if (!pte_same(*pte, orig_pte)) |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 284974230459..8f5330d74f47 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -656,19 +656,18 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, | |||
656 | * @nodes and @flags,) it's isolated and queued to the pagelist which is | 656 | * @nodes and @flags,) it's isolated and queued to the pagelist which is |
657 | * passed via @private.) | 657 | * passed via @private.) |
658 | */ | 658 | */ |
659 | static struct vm_area_struct * | 659 | static int |
660 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 660 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
661 | const nodemask_t *nodes, unsigned long flags, void *private) | 661 | const nodemask_t *nodes, unsigned long flags, void *private) |
662 | { | 662 | { |
663 | int err; | 663 | int err = 0; |
664 | struct vm_area_struct *first, *vma, *prev; | 664 | struct vm_area_struct *vma, *prev; |
665 | |||
666 | 665 | ||
667 | first = find_vma(mm, start); | 666 | vma = find_vma(mm, start); |
668 | if (!first) | 667 | if (!vma) |
669 | return ERR_PTR(-EFAULT); | 668 | return -EFAULT; |
670 | prev = NULL; | 669 | prev = NULL; |
671 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { | 670 | for (; vma && vma->vm_start < end; vma = vma->vm_next) { |
672 | unsigned long endvma = vma->vm_end; | 671 | unsigned long endvma = vma->vm_end; |
673 | 672 | ||
674 | if (endvma > end) | 673 | if (endvma > end) |
@@ -678,9 +677,9 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
678 | 677 | ||
679 | if (!(flags & MPOL_MF_DISCONTIG_OK)) { | 678 | if (!(flags & MPOL_MF_DISCONTIG_OK)) { |
680 | if (!vma->vm_next && vma->vm_end < end) | 679 | if (!vma->vm_next && vma->vm_end < end) |
681 | return ERR_PTR(-EFAULT); | 680 | return -EFAULT; |
682 | if (prev && prev->vm_end < vma->vm_start) | 681 | if (prev && prev->vm_end < vma->vm_start) |
683 | return ERR_PTR(-EFAULT); | 682 | return -EFAULT; |
684 | } | 683 | } |
685 | 684 | ||
686 | if (flags & MPOL_MF_LAZY) { | 685 | if (flags & MPOL_MF_LAZY) { |
@@ -694,15 +693,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
694 | 693 | ||
695 | err = queue_pages_pgd_range(vma, start, endvma, nodes, | 694 | err = queue_pages_pgd_range(vma, start, endvma, nodes, |
696 | flags, private); | 695 | flags, private); |
697 | if (err) { | 696 | if (err) |
698 | first = ERR_PTR(err); | ||
699 | break; | 697 | break; |
700 | } | ||
701 | } | 698 | } |
702 | next: | 699 | next: |
703 | prev = vma; | 700 | prev = vma; |
704 | } | 701 | } |
705 | return first; | 702 | return err; |
706 | } | 703 | } |
707 | 704 | ||
708 | /* | 705 | /* |
@@ -1156,16 +1153,17 @@ out: | |||
1156 | 1153 | ||
1157 | /* | 1154 | /* |
1158 | * Allocate a new page for page migration based on vma policy. | 1155 | * Allocate a new page for page migration based on vma policy. |
1159 | * Start assuming that page is mapped by vma pointed to by @private. | 1156 | * Start by assuming the page is mapped by the same vma as contains @start. |
1160 | * Search forward from there, if not. N.B., this assumes that the | 1157 | * Search forward from there, if not. N.B., this assumes that the |
1161 | * list of pages handed to migrate_pages()--which is how we get here-- | 1158 | * list of pages handed to migrate_pages()--which is how we get here-- |
1162 | * is in virtual address order. | 1159 | * is in virtual address order. |
1163 | */ | 1160 | */ |
1164 | static struct page *new_vma_page(struct page *page, unsigned long private, int **x) | 1161 | static struct page *new_page(struct page *page, unsigned long start, int **x) |
1165 | { | 1162 | { |
1166 | struct vm_area_struct *vma = (struct vm_area_struct *)private; | 1163 | struct vm_area_struct *vma; |
1167 | unsigned long uninitialized_var(address); | 1164 | unsigned long uninitialized_var(address); |
1168 | 1165 | ||
1166 | vma = find_vma(current->mm, start); | ||
1169 | while (vma) { | 1167 | while (vma) { |
1170 | address = page_address_in_vma(page, vma); | 1168 | address = page_address_in_vma(page, vma); |
1171 | if (address != -EFAULT) | 1169 | if (address != -EFAULT) |
@@ -1195,7 +1193,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, | |||
1195 | return -ENOSYS; | 1193 | return -ENOSYS; |
1196 | } | 1194 | } |
1197 | 1195 | ||
1198 | static struct page *new_vma_page(struct page *page, unsigned long private, int **x) | 1196 | static struct page *new_page(struct page *page, unsigned long start, int **x) |
1199 | { | 1197 | { |
1200 | return NULL; | 1198 | return NULL; |
1201 | } | 1199 | } |
@@ -1205,7 +1203,6 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1205 | unsigned short mode, unsigned short mode_flags, | 1203 | unsigned short mode, unsigned short mode_flags, |
1206 | nodemask_t *nmask, unsigned long flags) | 1204 | nodemask_t *nmask, unsigned long flags) |
1207 | { | 1205 | { |
1208 | struct vm_area_struct *vma; | ||
1209 | struct mm_struct *mm = current->mm; | 1206 | struct mm_struct *mm = current->mm; |
1210 | struct mempolicy *new; | 1207 | struct mempolicy *new; |
1211 | unsigned long end; | 1208 | unsigned long end; |
@@ -1271,11 +1268,9 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1271 | if (err) | 1268 | if (err) |
1272 | goto mpol_out; | 1269 | goto mpol_out; |
1273 | 1270 | ||
1274 | vma = queue_pages_range(mm, start, end, nmask, | 1271 | err = queue_pages_range(mm, start, end, nmask, |
1275 | flags | MPOL_MF_INVERT, &pagelist); | 1272 | flags | MPOL_MF_INVERT, &pagelist); |
1276 | 1273 | if (!err) | |
1277 | err = PTR_ERR(vma); /* maybe ... */ | ||
1278 | if (!IS_ERR(vma)) | ||
1279 | err = mbind_range(mm, start, end, new); | 1274 | err = mbind_range(mm, start, end, new); |
1280 | 1275 | ||
1281 | if (!err) { | 1276 | if (!err) { |
@@ -1283,9 +1278,8 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1283 | 1278 | ||
1284 | if (!list_empty(&pagelist)) { | 1279 | if (!list_empty(&pagelist)) { |
1285 | WARN_ON_ONCE(flags & MPOL_MF_LAZY); | 1280 | WARN_ON_ONCE(flags & MPOL_MF_LAZY); |
1286 | nr_failed = migrate_pages(&pagelist, new_vma_page, | 1281 | nr_failed = migrate_pages(&pagelist, new_page, NULL, |
1287 | NULL, (unsigned long)vma, | 1282 | start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND); |
1288 | MIGRATE_SYNC, MR_MEMPOLICY_MBIND); | ||
1289 | if (nr_failed) | 1283 | if (nr_failed) |
1290 | putback_movable_pages(&pagelist); | 1284 | putback_movable_pages(&pagelist); |
1291 | } | 1285 | } |
@@ -2145,7 +2139,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) | |||
2145 | } else | 2139 | } else |
2146 | *new = *old; | 2140 | *new = *old; |
2147 | 2141 | ||
2148 | rcu_read_lock(); | ||
2149 | if (current_cpuset_is_being_rebound()) { | 2142 | if (current_cpuset_is_being_rebound()) { |
2150 | nodemask_t mems = cpuset_mems_allowed(current); | 2143 | nodemask_t mems = cpuset_mems_allowed(current); |
2151 | if (new->flags & MPOL_F_REBINDING) | 2144 | if (new->flags & MPOL_F_REBINDING) |
@@ -2153,7 +2146,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) | |||
2153 | else | 2146 | else |
2154 | mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE); | 2147 | mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE); |
2155 | } | 2148 | } |
2156 | rcu_read_unlock(); | ||
2157 | atomic_set(&new->refcnt, 1); | 2149 | atomic_set(&new->refcnt, 1); |
2158 | return new; | 2150 | return new; |
2159 | } | 2151 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 63f0cd559999..be6dbf995c0c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -120,8 +120,6 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
120 | pmd = mm_find_pmd(mm, addr); | 120 | pmd = mm_find_pmd(mm, addr); |
121 | if (!pmd) | 121 | if (!pmd) |
122 | goto out; | 122 | goto out; |
123 | if (pmd_trans_huge(*pmd)) | ||
124 | goto out; | ||
125 | 123 | ||
126 | ptep = pte_offset_map(pmd, addr); | 124 | ptep = pte_offset_map(pmd, addr); |
127 | 125 | ||
@@ -990,9 +988,10 @@ out: | |||
990 | * it. Otherwise, putback_lru_page() will drop the reference grabbed | 988 | * it. Otherwise, putback_lru_page() will drop the reference grabbed |
991 | * during isolation. | 989 | * during isolation. |
992 | */ | 990 | */ |
993 | if (rc != MIGRATEPAGE_SUCCESS && put_new_page) | 991 | if (rc != MIGRATEPAGE_SUCCESS && put_new_page) { |
992 | ClearPageSwapBacked(newpage); | ||
994 | put_new_page(newpage, private); | 993 | put_new_page(newpage, private); |
995 | else | 994 | } else |
996 | putback_lru_page(newpage); | 995 | putback_lru_page(newpage); |
997 | 996 | ||
998 | if (result) { | 997 | if (result) { |
diff --git a/mm/msync.c b/mm/msync.c index a5c673669ca6..992a1673d488 100644 --- a/mm/msync.c +++ b/mm/msync.c | |||
@@ -78,7 +78,8 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) | |||
78 | goto out_unlock; | 78 | goto out_unlock; |
79 | } | 79 | } |
80 | file = vma->vm_file; | 80 | file = vma->vm_file; |
81 | fstart = start + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | 81 | fstart = (start - vma->vm_start) + |
82 | ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | ||
82 | fend = fstart + (min(end, vma->vm_end) - start) - 1; | 83 | fend = fstart + (min(end, vma->vm_end) - start) - 1; |
83 | start = vma->vm_end; | 84 | start = vma->vm_end; |
84 | if ((flags & MS_SYNC) && file && | 85 | if ((flags & MS_SYNC) && file && |
diff --git a/mm/nommu.c b/mm/nommu.c index b78e3a8f5ee7..4a852f6c5709 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -786,7 +786,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) | |||
786 | for (i = 0; i < VMACACHE_SIZE; i++) { | 786 | for (i = 0; i < VMACACHE_SIZE; i++) { |
787 | /* if the vma is cached, invalidate the entire cache */ | 787 | /* if the vma is cached, invalidate the entire cache */ |
788 | if (curr->vmacache[i] == vma) { | 788 | if (curr->vmacache[i] == vma) { |
789 | vmacache_invalidate(curr->mm); | 789 | vmacache_invalidate(mm); |
790 | break; | 790 | break; |
791 | } | 791 | } |
792 | } | 792 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4f59fa29eda8..0ea758b898fd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -69,6 +69,7 @@ | |||
69 | 69 | ||
70 | /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ | 70 | /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ |
71 | static DEFINE_MUTEX(pcp_batch_high_lock); | 71 | static DEFINE_MUTEX(pcp_batch_high_lock); |
72 | #define MIN_PERCPU_PAGELIST_FRACTION (8) | ||
72 | 73 | ||
73 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID | 74 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID |
74 | DEFINE_PER_CPU(int, numa_node); | 75 | DEFINE_PER_CPU(int, numa_node); |
@@ -815,9 +816,21 @@ void __init init_cma_reserved_pageblock(struct page *page) | |||
815 | set_page_count(p, 0); | 816 | set_page_count(p, 0); |
816 | } while (++p, --i); | 817 | } while (++p, --i); |
817 | 818 | ||
818 | set_page_refcounted(page); | ||
819 | set_pageblock_migratetype(page, MIGRATE_CMA); | 819 | set_pageblock_migratetype(page, MIGRATE_CMA); |
820 | __free_pages(page, pageblock_order); | 820 | |
821 | if (pageblock_order >= MAX_ORDER) { | ||
822 | i = pageblock_nr_pages; | ||
823 | p = page; | ||
824 | do { | ||
825 | set_page_refcounted(p); | ||
826 | __free_pages(p, MAX_ORDER - 1); | ||
827 | p += MAX_ORDER_NR_PAGES; | ||
828 | } while (i -= MAX_ORDER_NR_PAGES); | ||
829 | } else { | ||
830 | set_page_refcounted(page); | ||
831 | __free_pages(page, pageblock_order); | ||
832 | } | ||
833 | |||
821 | adjust_managed_page_count(page, pageblock_nr_pages); | 834 | adjust_managed_page_count(page, pageblock_nr_pages); |
822 | } | 835 | } |
823 | #endif | 836 | #endif |
@@ -4145,7 +4158,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) | |||
4145 | memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) | 4158 | memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) |
4146 | #endif | 4159 | #endif |
4147 | 4160 | ||
4148 | static int __meminit zone_batchsize(struct zone *zone) | 4161 | static int zone_batchsize(struct zone *zone) |
4149 | { | 4162 | { |
4150 | #ifdef CONFIG_MMU | 4163 | #ifdef CONFIG_MMU |
4151 | int batch; | 4164 | int batch; |
@@ -4261,8 +4274,8 @@ static void pageset_set_high(struct per_cpu_pageset *p, | |||
4261 | pageset_update(&p->pcp, high, batch); | 4274 | pageset_update(&p->pcp, high, batch); |
4262 | } | 4275 | } |
4263 | 4276 | ||
4264 | static void __meminit pageset_set_high_and_batch(struct zone *zone, | 4277 | static void pageset_set_high_and_batch(struct zone *zone, |
4265 | struct per_cpu_pageset *pcp) | 4278 | struct per_cpu_pageset *pcp) |
4266 | { | 4279 | { |
4267 | if (percpu_pagelist_fraction) | 4280 | if (percpu_pagelist_fraction) |
4268 | pageset_set_high(pcp, | 4281 | pageset_set_high(pcp, |
@@ -5881,23 +5894,38 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write, | |||
5881 | void __user *buffer, size_t *length, loff_t *ppos) | 5894 | void __user *buffer, size_t *length, loff_t *ppos) |
5882 | { | 5895 | { |
5883 | struct zone *zone; | 5896 | struct zone *zone; |
5884 | unsigned int cpu; | 5897 | int old_percpu_pagelist_fraction; |
5885 | int ret; | 5898 | int ret; |
5886 | 5899 | ||
5900 | mutex_lock(&pcp_batch_high_lock); | ||
5901 | old_percpu_pagelist_fraction = percpu_pagelist_fraction; | ||
5902 | |||
5887 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); | 5903 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); |
5888 | if (!write || (ret < 0)) | 5904 | if (!write || ret < 0) |
5889 | return ret; | 5905 | goto out; |
5906 | |||
5907 | /* Sanity checking to avoid pcp imbalance */ | ||
5908 | if (percpu_pagelist_fraction && | ||
5909 | percpu_pagelist_fraction < MIN_PERCPU_PAGELIST_FRACTION) { | ||
5910 | percpu_pagelist_fraction = old_percpu_pagelist_fraction; | ||
5911 | ret = -EINVAL; | ||
5912 | goto out; | ||
5913 | } | ||
5914 | |||
5915 | /* No change? */ | ||
5916 | if (percpu_pagelist_fraction == old_percpu_pagelist_fraction) | ||
5917 | goto out; | ||
5890 | 5918 | ||
5891 | mutex_lock(&pcp_batch_high_lock); | ||
5892 | for_each_populated_zone(zone) { | 5919 | for_each_populated_zone(zone) { |
5893 | unsigned long high; | 5920 | unsigned int cpu; |
5894 | high = zone->managed_pages / percpu_pagelist_fraction; | 5921 | |
5895 | for_each_possible_cpu(cpu) | 5922 | for_each_possible_cpu(cpu) |
5896 | pageset_set_high(per_cpu_ptr(zone->pageset, cpu), | 5923 | pageset_set_high_and_batch(zone, |
5897 | high); | 5924 | per_cpu_ptr(zone->pageset, cpu)); |
5898 | } | 5925 | } |
5926 | out: | ||
5899 | mutex_unlock(&pcp_batch_high_lock); | 5927 | mutex_unlock(&pcp_batch_high_lock); |
5900 | return 0; | 5928 | return ret; |
5901 | } | 5929 | } |
5902 | 5930 | ||
5903 | int hashdist = HASHDIST_DEFAULT; | 5931 | int hashdist = HASHDIST_DEFAULT; |
@@ -517,11 +517,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma) | |||
517 | static inline unsigned long | 517 | static inline unsigned long |
518 | __vma_address(struct page *page, struct vm_area_struct *vma) | 518 | __vma_address(struct page *page, struct vm_area_struct *vma) |
519 | { | 519 | { |
520 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 520 | pgoff_t pgoff = page_to_pgoff(page); |
521 | |||
522 | if (unlikely(is_vm_hugetlb_page(vma))) | ||
523 | pgoff = page->index << huge_page_order(page_hstate(page)); | ||
524 | |||
525 | return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 521 | return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
526 | } | 522 | } |
527 | 523 | ||
@@ -569,6 +565,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) | |||
569 | pgd_t *pgd; | 565 | pgd_t *pgd; |
570 | pud_t *pud; | 566 | pud_t *pud; |
571 | pmd_t *pmd = NULL; | 567 | pmd_t *pmd = NULL; |
568 | pmd_t pmde; | ||
572 | 569 | ||
573 | pgd = pgd_offset(mm, address); | 570 | pgd = pgd_offset(mm, address); |
574 | if (!pgd_present(*pgd)) | 571 | if (!pgd_present(*pgd)) |
@@ -579,7 +576,13 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) | |||
579 | goto out; | 576 | goto out; |
580 | 577 | ||
581 | pmd = pmd_offset(pud, address); | 578 | pmd = pmd_offset(pud, address); |
582 | if (!pmd_present(*pmd)) | 579 | /* |
580 | * Some THP functions use the sequence pmdp_clear_flush(), set_pmd_at() | ||
581 | * without holding anon_vma lock for write. So when looking for a | ||
582 | * genuine pmde (in which to find pte), test present and !THP together. | ||
583 | */ | ||
584 | pmde = ACCESS_ONCE(*pmd); | ||
585 | if (!pmd_present(pmde) || pmd_trans_huge(pmde)) | ||
583 | pmd = NULL; | 586 | pmd = NULL; |
584 | out: | 587 | out: |
585 | return pmd; | 588 | return pmd; |
@@ -615,9 +618,6 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, | |||
615 | if (!pmd) | 618 | if (!pmd) |
616 | return NULL; | 619 | return NULL; |
617 | 620 | ||
618 | if (pmd_trans_huge(*pmd)) | ||
619 | return NULL; | ||
620 | |||
621 | pte = pte_offset_map(pmd, address); | 621 | pte = pte_offset_map(pmd, address); |
622 | /* Make a quick check before getting the lock */ | 622 | /* Make a quick check before getting the lock */ |
623 | if (!sync && !pte_present(*pte)) { | 623 | if (!sync && !pte_present(*pte)) { |
@@ -1635,7 +1635,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page, | |||
1635 | static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) | 1635 | static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) |
1636 | { | 1636 | { |
1637 | struct anon_vma *anon_vma; | 1637 | struct anon_vma *anon_vma; |
1638 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1638 | pgoff_t pgoff = page_to_pgoff(page); |
1639 | struct anon_vma_chain *avc; | 1639 | struct anon_vma_chain *avc; |
1640 | int ret = SWAP_AGAIN; | 1640 | int ret = SWAP_AGAIN; |
1641 | 1641 | ||
@@ -1676,7 +1676,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) | |||
1676 | static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) | 1676 | static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) |
1677 | { | 1677 | { |
1678 | struct address_space *mapping = page->mapping; | 1678 | struct address_space *mapping = page->mapping; |
1679 | pgoff_t pgoff = page->index << compound_order(page); | 1679 | pgoff_t pgoff = page_to_pgoff(page); |
1680 | struct vm_area_struct *vma; | 1680 | struct vm_area_struct *vma; |
1681 | int ret = SWAP_AGAIN; | 1681 | int ret = SWAP_AGAIN; |
1682 | 1682 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index f484c276e994..af68b15a8fc1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt; | |||
80 | #define SHORT_SYMLINK_LEN 128 | 80 | #define SHORT_SYMLINK_LEN 128 |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * shmem_fallocate and shmem_writepage communicate via inode->i_private | 83 | * shmem_fallocate communicates with shmem_fault or shmem_writepage via |
84 | * (with i_mutex making sure that it has only one user at a time): | 84 | * inode->i_private (with i_mutex making sure that it has only one user at |
85 | * we would prefer not to enlarge the shmem inode just for that. | 85 | * a time): we would prefer not to enlarge the shmem inode just for that. |
86 | */ | 86 | */ |
87 | struct shmem_falloc { | 87 | struct shmem_falloc { |
88 | wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ | ||
88 | pgoff_t start; /* start of range currently being fallocated */ | 89 | pgoff_t start; /* start of range currently being fallocated */ |
89 | pgoff_t next; /* the next page offset to be fallocated */ | 90 | pgoff_t next; /* the next page offset to be fallocated */ |
90 | pgoff_t nr_falloced; /* how many new pages have been fallocated */ | 91 | pgoff_t nr_falloced; /* how many new pages have been fallocated */ |
@@ -467,23 +468,20 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
467 | return; | 468 | return; |
468 | 469 | ||
469 | index = start; | 470 | index = start; |
470 | for ( ; ; ) { | 471 | while (index < end) { |
471 | cond_resched(); | 472 | cond_resched(); |
472 | 473 | ||
473 | pvec.nr = find_get_entries(mapping, index, | 474 | pvec.nr = find_get_entries(mapping, index, |
474 | min(end - index, (pgoff_t)PAGEVEC_SIZE), | 475 | min(end - index, (pgoff_t)PAGEVEC_SIZE), |
475 | pvec.pages, indices); | 476 | pvec.pages, indices); |
476 | if (!pvec.nr) { | 477 | if (!pvec.nr) { |
477 | if (index == start || unfalloc) | 478 | /* If all gone or hole-punch or unfalloc, we're done */ |
479 | if (index == start || end != -1) | ||
478 | break; | 480 | break; |
481 | /* But if truncating, restart to make sure all gone */ | ||
479 | index = start; | 482 | index = start; |
480 | continue; | 483 | continue; |
481 | } | 484 | } |
482 | if ((index == start || unfalloc) && indices[0] >= end) { | ||
483 | pagevec_remove_exceptionals(&pvec); | ||
484 | pagevec_release(&pvec); | ||
485 | break; | ||
486 | } | ||
487 | mem_cgroup_uncharge_start(); | 485 | mem_cgroup_uncharge_start(); |
488 | for (i = 0; i < pagevec_count(&pvec); i++) { | 486 | for (i = 0; i < pagevec_count(&pvec); i++) { |
489 | struct page *page = pvec.pages[i]; | 487 | struct page *page = pvec.pages[i]; |
@@ -495,8 +493,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
495 | if (radix_tree_exceptional_entry(page)) { | 493 | if (radix_tree_exceptional_entry(page)) { |
496 | if (unfalloc) | 494 | if (unfalloc) |
497 | continue; | 495 | continue; |
498 | nr_swaps_freed += !shmem_free_swap(mapping, | 496 | if (shmem_free_swap(mapping, index, page)) { |
499 | index, page); | 497 | /* Swap was replaced by page: retry */ |
498 | index--; | ||
499 | break; | ||
500 | } | ||
501 | nr_swaps_freed++; | ||
500 | continue; | 502 | continue; |
501 | } | 503 | } |
502 | 504 | ||
@@ -505,6 +507,11 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
505 | if (page->mapping == mapping) { | 507 | if (page->mapping == mapping) { |
506 | VM_BUG_ON_PAGE(PageWriteback(page), page); | 508 | VM_BUG_ON_PAGE(PageWriteback(page), page); |
507 | truncate_inode_page(mapping, page); | 509 | truncate_inode_page(mapping, page); |
510 | } else { | ||
511 | /* Page was replaced by swap: retry */ | ||
512 | unlock_page(page); | ||
513 | index--; | ||
514 | break; | ||
508 | } | 515 | } |
509 | } | 516 | } |
510 | unlock_page(page); | 517 | unlock_page(page); |
@@ -759,6 +766,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
759 | spin_lock(&inode->i_lock); | 766 | spin_lock(&inode->i_lock); |
760 | shmem_falloc = inode->i_private; | 767 | shmem_falloc = inode->i_private; |
761 | if (shmem_falloc && | 768 | if (shmem_falloc && |
769 | !shmem_falloc->waitq && | ||
762 | index >= shmem_falloc->start && | 770 | index >= shmem_falloc->start && |
763 | index < shmem_falloc->next) | 771 | index < shmem_falloc->next) |
764 | shmem_falloc->nr_unswapped++; | 772 | shmem_falloc->nr_unswapped++; |
@@ -1027,6 +1035,9 @@ repeat: | |||
1027 | goto failed; | 1035 | goto failed; |
1028 | } | 1036 | } |
1029 | 1037 | ||
1038 | if (page && sgp == SGP_WRITE) | ||
1039 | mark_page_accessed(page); | ||
1040 | |||
1030 | /* fallocated page? */ | 1041 | /* fallocated page? */ |
1031 | if (page && !PageUptodate(page)) { | 1042 | if (page && !PageUptodate(page)) { |
1032 | if (sgp != SGP_READ) | 1043 | if (sgp != SGP_READ) |
@@ -1108,6 +1119,9 @@ repeat: | |||
1108 | shmem_recalc_inode(inode); | 1119 | shmem_recalc_inode(inode); |
1109 | spin_unlock(&info->lock); | 1120 | spin_unlock(&info->lock); |
1110 | 1121 | ||
1122 | if (sgp == SGP_WRITE) | ||
1123 | mark_page_accessed(page); | ||
1124 | |||
1111 | delete_from_swap_cache(page); | 1125 | delete_from_swap_cache(page); |
1112 | set_page_dirty(page); | 1126 | set_page_dirty(page); |
1113 | swap_free(swap); | 1127 | swap_free(swap); |
@@ -1134,6 +1148,9 @@ repeat: | |||
1134 | 1148 | ||
1135 | __SetPageSwapBacked(page); | 1149 | __SetPageSwapBacked(page); |
1136 | __set_page_locked(page); | 1150 | __set_page_locked(page); |
1151 | if (sgp == SGP_WRITE) | ||
1152 | init_page_accessed(page); | ||
1153 | |||
1137 | error = mem_cgroup_charge_file(page, current->mm, | 1154 | error = mem_cgroup_charge_file(page, current->mm, |
1138 | gfp & GFP_RECLAIM_MASK); | 1155 | gfp & GFP_RECLAIM_MASK); |
1139 | if (error) | 1156 | if (error) |
@@ -1233,6 +1250,64 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1233 | int error; | 1250 | int error; |
1234 | int ret = VM_FAULT_LOCKED; | 1251 | int ret = VM_FAULT_LOCKED; |
1235 | 1252 | ||
1253 | /* | ||
1254 | * Trinity finds that probing a hole which tmpfs is punching can | ||
1255 | * prevent the hole-punch from ever completing: which in turn | ||
1256 | * locks writers out with its hold on i_mutex. So refrain from | ||
1257 | * faulting pages into the hole while it's being punched. Although | ||
1258 | * shmem_undo_range() does remove the additions, it may be unable to | ||
1259 | * keep up, as each new page needs its own unmap_mapping_range() call, | ||
1260 | * and the i_mmap tree grows ever slower to scan if new vmas are added. | ||
1261 | * | ||
1262 | * It does not matter if we sometimes reach this check just before the | ||
1263 | * hole-punch begins, so that one fault then races with the punch: | ||
1264 | * we just need to make racing faults a rare case. | ||
1265 | * | ||
1266 | * The implementation below would be much simpler if we just used a | ||
1267 | * standard mutex or completion: but we cannot take i_mutex in fault, | ||
1268 | * and bloating every shmem inode for this unlikely case would be sad. | ||
1269 | */ | ||
1270 | if (unlikely(inode->i_private)) { | ||
1271 | struct shmem_falloc *shmem_falloc; | ||
1272 | |||
1273 | spin_lock(&inode->i_lock); | ||
1274 | shmem_falloc = inode->i_private; | ||
1275 | if (shmem_falloc && | ||
1276 | shmem_falloc->waitq && | ||
1277 | vmf->pgoff >= shmem_falloc->start && | ||
1278 | vmf->pgoff < shmem_falloc->next) { | ||
1279 | wait_queue_head_t *shmem_falloc_waitq; | ||
1280 | DEFINE_WAIT(shmem_fault_wait); | ||
1281 | |||
1282 | ret = VM_FAULT_NOPAGE; | ||
1283 | if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && | ||
1284 | !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { | ||
1285 | /* It's polite to up mmap_sem if we can */ | ||
1286 | up_read(&vma->vm_mm->mmap_sem); | ||
1287 | ret = VM_FAULT_RETRY; | ||
1288 | } | ||
1289 | |||
1290 | shmem_falloc_waitq = shmem_falloc->waitq; | ||
1291 | prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, | ||
1292 | TASK_UNINTERRUPTIBLE); | ||
1293 | spin_unlock(&inode->i_lock); | ||
1294 | schedule(); | ||
1295 | |||
1296 | /* | ||
1297 | * shmem_falloc_waitq points into the shmem_fallocate() | ||
1298 | * stack of the hole-punching task: shmem_falloc_waitq | ||
1299 | * is usually invalid by the time we reach here, but | ||
1300 | * finish_wait() does not dereference it in that case; | ||
1301 | * though i_lock needed lest racing with wake_up_all(). | ||
1302 | */ | ||
1303 | spin_lock(&inode->i_lock); | ||
1304 | finish_wait(shmem_falloc_waitq, &shmem_fault_wait); | ||
1305 | spin_unlock(&inode->i_lock); | ||
1306 | return ret; | ||
1307 | } | ||
1308 | spin_unlock(&inode->i_lock); | ||
1309 | } | ||
1310 | |||
1236 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); | 1311 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); |
1237 | if (error) | 1312 | if (error) |
1238 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); | 1313 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); |
@@ -1372,13 +1447,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping, | |||
1372 | loff_t pos, unsigned len, unsigned flags, | 1447 | loff_t pos, unsigned len, unsigned flags, |
1373 | struct page **pagep, void **fsdata) | 1448 | struct page **pagep, void **fsdata) |
1374 | { | 1449 | { |
1375 | int ret; | ||
1376 | struct inode *inode = mapping->host; | 1450 | struct inode *inode = mapping->host; |
1377 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 1451 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
1378 | ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); | 1452 | return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); |
1379 | if (ret == 0 && *pagep) | ||
1380 | init_page_accessed(*pagep); | ||
1381 | return ret; | ||
1382 | } | 1453 | } |
1383 | 1454 | ||
1384 | static int | 1455 | static int |
@@ -1724,18 +1795,34 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
1724 | pgoff_t start, index, end; | 1795 | pgoff_t start, index, end; |
1725 | int error; | 1796 | int error; |
1726 | 1797 | ||
1798 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | ||
1799 | return -EOPNOTSUPP; | ||
1800 | |||
1727 | mutex_lock(&inode->i_mutex); | 1801 | mutex_lock(&inode->i_mutex); |
1728 | 1802 | ||
1729 | if (mode & FALLOC_FL_PUNCH_HOLE) { | 1803 | if (mode & FALLOC_FL_PUNCH_HOLE) { |
1730 | struct address_space *mapping = file->f_mapping; | 1804 | struct address_space *mapping = file->f_mapping; |
1731 | loff_t unmap_start = round_up(offset, PAGE_SIZE); | 1805 | loff_t unmap_start = round_up(offset, PAGE_SIZE); |
1732 | loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; | 1806 | loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; |
1807 | DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); | ||
1808 | |||
1809 | shmem_falloc.waitq = &shmem_falloc_waitq; | ||
1810 | shmem_falloc.start = unmap_start >> PAGE_SHIFT; | ||
1811 | shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; | ||
1812 | spin_lock(&inode->i_lock); | ||
1813 | inode->i_private = &shmem_falloc; | ||
1814 | spin_unlock(&inode->i_lock); | ||
1733 | 1815 | ||
1734 | if ((u64)unmap_end > (u64)unmap_start) | 1816 | if ((u64)unmap_end > (u64)unmap_start) |
1735 | unmap_mapping_range(mapping, unmap_start, | 1817 | unmap_mapping_range(mapping, unmap_start, |
1736 | 1 + unmap_end - unmap_start, 0); | 1818 | 1 + unmap_end - unmap_start, 0); |
1737 | shmem_truncate_range(inode, offset, offset + len - 1); | 1819 | shmem_truncate_range(inode, offset, offset + len - 1); |
1738 | /* No need to unmap again: hole-punching leaves COWed pages */ | 1820 | /* No need to unmap again: hole-punching leaves COWed pages */ |
1821 | |||
1822 | spin_lock(&inode->i_lock); | ||
1823 | inode->i_private = NULL; | ||
1824 | wake_up_all(&shmem_falloc_waitq); | ||
1825 | spin_unlock(&inode->i_lock); | ||
1739 | error = 0; | 1826 | error = 0; |
1740 | goto out; | 1827 | goto out; |
1741 | } | 1828 | } |
@@ -1753,6 +1840,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
1753 | goto out; | 1840 | goto out; |
1754 | } | 1841 | } |
1755 | 1842 | ||
1843 | shmem_falloc.waitq = NULL; | ||
1756 | shmem_falloc.start = start; | 1844 | shmem_falloc.start = start; |
1757 | shmem_falloc.next = start; | 1845 | shmem_falloc.next = start; |
1758 | shmem_falloc.nr_falloced = 0; | 1846 | shmem_falloc.nr_falloced = 0; |
@@ -386,6 +386,39 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
386 | 386 | ||
387 | #endif | 387 | #endif |
388 | 388 | ||
389 | #define OBJECT_FREE (0) | ||
390 | #define OBJECT_ACTIVE (1) | ||
391 | |||
392 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
393 | |||
394 | static void set_obj_status(struct page *page, int idx, int val) | ||
395 | { | ||
396 | int freelist_size; | ||
397 | char *status; | ||
398 | struct kmem_cache *cachep = page->slab_cache; | ||
399 | |||
400 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
401 | status = (char *)page->freelist + freelist_size; | ||
402 | status[idx] = val; | ||
403 | } | ||
404 | |||
405 | static inline unsigned int get_obj_status(struct page *page, int idx) | ||
406 | { | ||
407 | int freelist_size; | ||
408 | char *status; | ||
409 | struct kmem_cache *cachep = page->slab_cache; | ||
410 | |||
411 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
412 | status = (char *)page->freelist + freelist_size; | ||
413 | |||
414 | return status[idx]; | ||
415 | } | ||
416 | |||
417 | #else | ||
418 | static inline void set_obj_status(struct page *page, int idx, int val) {} | ||
419 | |||
420 | #endif | ||
421 | |||
389 | /* | 422 | /* |
390 | * Do not go above this order unless 0 objects fit into the slab or | 423 | * Do not go above this order unless 0 objects fit into the slab or |
391 | * overridden on the command line. | 424 | * overridden on the command line. |
@@ -576,12 +609,30 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
576 | return cachep->array[smp_processor_id()]; | 609 | return cachep->array[smp_processor_id()]; |
577 | } | 610 | } |
578 | 611 | ||
612 | static size_t calculate_freelist_size(int nr_objs, size_t align) | ||
613 | { | ||
614 | size_t freelist_size; | ||
615 | |||
616 | freelist_size = nr_objs * sizeof(freelist_idx_t); | ||
617 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
618 | freelist_size += nr_objs * sizeof(char); | ||
619 | |||
620 | if (align) | ||
621 | freelist_size = ALIGN(freelist_size, align); | ||
622 | |||
623 | return freelist_size; | ||
624 | } | ||
625 | |||
579 | static int calculate_nr_objs(size_t slab_size, size_t buffer_size, | 626 | static int calculate_nr_objs(size_t slab_size, size_t buffer_size, |
580 | size_t idx_size, size_t align) | 627 | size_t idx_size, size_t align) |
581 | { | 628 | { |
582 | int nr_objs; | 629 | int nr_objs; |
630 | size_t remained_size; | ||
583 | size_t freelist_size; | 631 | size_t freelist_size; |
632 | int extra_space = 0; | ||
584 | 633 | ||
634 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
635 | extra_space = sizeof(char); | ||
585 | /* | 636 | /* |
586 | * Ignore padding for the initial guess. The padding | 637 | * Ignore padding for the initial guess. The padding |
587 | * is at most @align-1 bytes, and @buffer_size is at | 638 | * is at most @align-1 bytes, and @buffer_size is at |
@@ -590,14 +641,15 @@ static int calculate_nr_objs(size_t slab_size, size_t buffer_size, | |||
590 | * into the memory allocation when taking the padding | 641 | * into the memory allocation when taking the padding |
591 | * into account. | 642 | * into account. |
592 | */ | 643 | */ |
593 | nr_objs = slab_size / (buffer_size + idx_size); | 644 | nr_objs = slab_size / (buffer_size + idx_size + extra_space); |
594 | 645 | ||
595 | /* | 646 | /* |
596 | * This calculated number will be either the right | 647 | * This calculated number will be either the right |
597 | * amount, or one greater than what we want. | 648 | * amount, or one greater than what we want. |
598 | */ | 649 | */ |
599 | freelist_size = slab_size - nr_objs * buffer_size; | 650 | remained_size = slab_size - nr_objs * buffer_size; |
600 | if (freelist_size < ALIGN(nr_objs * idx_size, align)) | 651 | freelist_size = calculate_freelist_size(nr_objs, align); |
652 | if (remained_size < freelist_size) | ||
601 | nr_objs--; | 653 | nr_objs--; |
602 | 654 | ||
603 | return nr_objs; | 655 | return nr_objs; |
@@ -635,7 +687,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
635 | } else { | 687 | } else { |
636 | nr_objs = calculate_nr_objs(slab_size, buffer_size, | 688 | nr_objs = calculate_nr_objs(slab_size, buffer_size, |
637 | sizeof(freelist_idx_t), align); | 689 | sizeof(freelist_idx_t), align); |
638 | mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align); | 690 | mgmt_size = calculate_freelist_size(nr_objs, align); |
639 | } | 691 | } |
640 | *num = nr_objs; | 692 | *num = nr_objs; |
641 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; | 693 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; |
@@ -2041,13 +2093,16 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2041 | break; | 2093 | break; |
2042 | 2094 | ||
2043 | if (flags & CFLGS_OFF_SLAB) { | 2095 | if (flags & CFLGS_OFF_SLAB) { |
2096 | size_t freelist_size_per_obj = sizeof(freelist_idx_t); | ||
2044 | /* | 2097 | /* |
2045 | * Max number of objs-per-slab for caches which | 2098 | * Max number of objs-per-slab for caches which |
2046 | * use off-slab slabs. Needed to avoid a possible | 2099 | * use off-slab slabs. Needed to avoid a possible |
2047 | * looping condition in cache_grow(). | 2100 | * looping condition in cache_grow(). |
2048 | */ | 2101 | */ |
2102 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
2103 | freelist_size_per_obj += sizeof(char); | ||
2049 | offslab_limit = size; | 2104 | offslab_limit = size; |
2050 | offslab_limit /= sizeof(freelist_idx_t); | 2105 | offslab_limit /= freelist_size_per_obj; |
2051 | 2106 | ||
2052 | if (num > offslab_limit) | 2107 | if (num > offslab_limit) |
2053 | break; | 2108 | break; |
@@ -2294,8 +2349,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2294 | if (!cachep->num) | 2349 | if (!cachep->num) |
2295 | return -E2BIG; | 2350 | return -E2BIG; |
2296 | 2351 | ||
2297 | freelist_size = | 2352 | freelist_size = calculate_freelist_size(cachep->num, cachep->align); |
2298 | ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align); | ||
2299 | 2353 | ||
2300 | /* | 2354 | /* |
2301 | * If the slab has been placed off-slab, and we have enough space then | 2355 | * If the slab has been placed off-slab, and we have enough space then |
@@ -2308,7 +2362,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2308 | 2362 | ||
2309 | if (flags & CFLGS_OFF_SLAB) { | 2363 | if (flags & CFLGS_OFF_SLAB) { |
2310 | /* really off slab. No need for manual alignment */ | 2364 | /* really off slab. No need for manual alignment */ |
2311 | freelist_size = cachep->num * sizeof(freelist_idx_t); | 2365 | freelist_size = calculate_freelist_size(cachep->num, 0); |
2312 | 2366 | ||
2313 | #ifdef CONFIG_PAGE_POISONING | 2367 | #ifdef CONFIG_PAGE_POISONING |
2314 | /* If we're going to use the generic kernel_map_pages() | 2368 | /* If we're going to use the generic kernel_map_pages() |
@@ -2612,6 +2666,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2612 | if (cachep->ctor) | 2666 | if (cachep->ctor) |
2613 | cachep->ctor(objp); | 2667 | cachep->ctor(objp); |
2614 | #endif | 2668 | #endif |
2669 | set_obj_status(page, i, OBJECT_FREE); | ||
2615 | set_free_obj(page, i, i); | 2670 | set_free_obj(page, i, i); |
2616 | } | 2671 | } |
2617 | } | 2672 | } |
@@ -2820,6 +2875,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2820 | BUG_ON(objnr >= cachep->num); | 2875 | BUG_ON(objnr >= cachep->num); |
2821 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); | 2876 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); |
2822 | 2877 | ||
2878 | set_obj_status(page, objnr, OBJECT_FREE); | ||
2823 | if (cachep->flags & SLAB_POISON) { | 2879 | if (cachep->flags & SLAB_POISON) { |
2824 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2880 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2825 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { | 2881 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
@@ -2953,6 +3009,8 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | |||
2953 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | 3009 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
2954 | gfp_t flags, void *objp, unsigned long caller) | 3010 | gfp_t flags, void *objp, unsigned long caller) |
2955 | { | 3011 | { |
3012 | struct page *page; | ||
3013 | |||
2956 | if (!objp) | 3014 | if (!objp) |
2957 | return objp; | 3015 | return objp; |
2958 | if (cachep->flags & SLAB_POISON) { | 3016 | if (cachep->flags & SLAB_POISON) { |
@@ -2983,6 +3041,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
2983 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 3041 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
2984 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 3042 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
2985 | } | 3043 | } |
3044 | |||
3045 | page = virt_to_head_page(objp); | ||
3046 | set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE); | ||
2986 | objp += obj_offset(cachep); | 3047 | objp += obj_offset(cachep); |
2987 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 3048 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
2988 | cachep->ctor(objp); | 3049 | cachep->ctor(objp); |
@@ -4219,21 +4280,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, | |||
4219 | struct page *page) | 4280 | struct page *page) |
4220 | { | 4281 | { |
4221 | void *p; | 4282 | void *p; |
4222 | int i, j; | 4283 | int i; |
4223 | 4284 | ||
4224 | if (n[0] == n[1]) | 4285 | if (n[0] == n[1]) |
4225 | return; | 4286 | return; |
4226 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { | 4287 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { |
4227 | bool active = true; | 4288 | if (get_obj_status(page, i) != OBJECT_ACTIVE) |
4228 | |||
4229 | for (j = page->active; j < c->num; j++) { | ||
4230 | /* Skip freed item */ | ||
4231 | if (get_free_obj(page, j) == i) { | ||
4232 | active = false; | ||
4233 | break; | ||
4234 | } | ||
4235 | } | ||
4236 | if (!active) | ||
4237 | continue; | 4289 | continue; |
4238 | 4290 | ||
4239 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | 4291 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 735e01a0db6f..d31c4bacc6a2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -55,7 +55,7 @@ static int kmem_cache_sanity_check(const char *name, size_t size) | |||
55 | continue; | 55 | continue; |
56 | } | 56 | } |
57 | 57 | ||
58 | #if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) | 58 | #if !defined(CONFIG_SLUB) |
59 | if (!strcmp(s->name, name)) { | 59 | if (!strcmp(s->name, name)) { |
60 | pr_err("%s (%s): Cache name already exists.\n", | 60 | pr_err("%s (%s): Cache name already exists.\n", |
61 | __func__, name); | 61 | __func__, name); |
@@ -1881,7 +1881,7 @@ redo: | |||
1881 | 1881 | ||
1882 | new.frozen = 0; | 1882 | new.frozen = 0; |
1883 | 1883 | ||
1884 | if (!new.inuse && n->nr_partial > s->min_partial) | 1884 | if (!new.inuse && n->nr_partial >= s->min_partial) |
1885 | m = M_FREE; | 1885 | m = M_FREE; |
1886 | else if (new.freelist) { | 1886 | else if (new.freelist) { |
1887 | m = M_PARTIAL; | 1887 | m = M_PARTIAL; |
@@ -1992,7 +1992,7 @@ static void unfreeze_partials(struct kmem_cache *s, | |||
1992 | new.freelist, new.counters, | 1992 | new.freelist, new.counters, |
1993 | "unfreezing slab")); | 1993 | "unfreezing slab")); |
1994 | 1994 | ||
1995 | if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) { | 1995 | if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) { |
1996 | page->next = discard_page; | 1996 | page->next = discard_page; |
1997 | discard_page = page; | 1997 | discard_page = page; |
1998 | } else { | 1998 | } else { |
@@ -2620,7 +2620,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2620 | return; | 2620 | return; |
2621 | } | 2621 | } |
2622 | 2622 | ||
2623 | if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) | 2623 | if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) |
2624 | goto slab_empty; | 2624 | goto slab_empty; |
2625 | 2625 | ||
2626 | /* | 2626 | /* |
diff --git a/mm/truncate.c b/mm/truncate.c index 6a78c814bebf..eda247307164 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -355,14 +355,16 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
355 | for ( ; ; ) { | 355 | for ( ; ; ) { |
356 | cond_resched(); | 356 | cond_resched(); |
357 | if (!pagevec_lookup_entries(&pvec, mapping, index, | 357 | if (!pagevec_lookup_entries(&pvec, mapping, index, |
358 | min(end - index, (pgoff_t)PAGEVEC_SIZE), | 358 | min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { |
359 | indices)) { | 359 | /* If all gone from start onwards, we're done */ |
360 | if (index == start) | 360 | if (index == start) |
361 | break; | 361 | break; |
362 | /* Otherwise restart to make sure all gone */ | ||
362 | index = start; | 363 | index = start; |
363 | continue; | 364 | continue; |
364 | } | 365 | } |
365 | if (index == start && indices[0] >= end) { | 366 | if (index == start && indices[0] >= end) { |
367 | /* All gone out of hole to be punched, we're done */ | ||
366 | pagevec_remove_exceptionals(&pvec); | 368 | pagevec_remove_exceptionals(&pvec); |
367 | pagevec_release(&pvec); | 369 | pagevec_release(&pvec); |
368 | break; | 370 | break; |
@@ -373,8 +375,11 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
373 | 375 | ||
374 | /* We rely upon deletion not changing page->index */ | 376 | /* We rely upon deletion not changing page->index */ |
375 | index = indices[i]; | 377 | index = indices[i]; |
376 | if (index >= end) | 378 | if (index >= end) { |
379 | /* Restart punch to make sure all gone */ | ||
380 | index = start - 1; | ||
377 | break; | 381 | break; |
382 | } | ||
378 | 383 | ||
379 | if (radix_tree_exceptional_entry(page)) { | 384 | if (radix_tree_exceptional_entry(page)) { |
380 | clear_exceptional_entry(mapping, index, page); | 385 | clear_exceptional_entry(mapping, index, page); |