aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-03 12:03:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-03 12:03:50 -0400
commitf0395d5b4d691164a6e4d107590636db80b29bf6 (patch)
tree6a362f035c703d3d1719deb2ae6c9cd8ce8ca671
parentfb615d61b5583db92e3793709b97e35dc9499c2a (diff)
parent2628bd6fc052bd85e9864dae4de494d8a6313391 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "7 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm, swap: fix race between swap count continuation operations mm/huge_memory.c: deposit page table when copying a PMD migration entry initramfs: fix initramfs rebuilds w/ compression after disabling fs/hugetlbfs/inode.c: fix hwpoison reserve accounting ocfs2: fstrim: Fix start offset of first cluster group during fstrim mm, /proc/pid/pagemap: fix soft dirty marking for PMD migration entry userfaultfd: hugetlbfs: prevent UFFDIO_COPY to fill beyond the end of i_size
-rw-r--r--fs/hugetlbfs/inode.c5
-rw-r--r--fs/ocfs2/alloc.c24
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--include/linux/swap.h4
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/hugetlb.c32
-rw-r--r--mm/swapfile.c23
-rw-r--r--usr/Makefile9
8 files changed, 86 insertions, 20 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 59073e9f01a4..ed113ea17aff 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -842,9 +842,12 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping,
842 struct page *page) 842 struct page *page)
843{ 843{
844 struct inode *inode = mapping->host; 844 struct inode *inode = mapping->host;
845 pgoff_t index = page->index;
845 846
846 remove_huge_page(page); 847 remove_huge_page(page);
847 hugetlb_fix_reserve_counts(inode); 848 if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
849 hugetlb_fix_reserve_counts(inode);
850
848 return 0; 851 return 0;
849} 852}
850 853
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a177eae3aa1a..addd7c5f2d3e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7304,13 +7304,24 @@ out:
7304 7304
7305static int ocfs2_trim_extent(struct super_block *sb, 7305static int ocfs2_trim_extent(struct super_block *sb,
7306 struct ocfs2_group_desc *gd, 7306 struct ocfs2_group_desc *gd,
7307 u32 start, u32 count) 7307 u64 group, u32 start, u32 count)
7308{ 7308{
7309 u64 discard, bcount; 7309 u64 discard, bcount;
7310 struct ocfs2_super *osb = OCFS2_SB(sb);
7310 7311
7311 bcount = ocfs2_clusters_to_blocks(sb, count); 7312 bcount = ocfs2_clusters_to_blocks(sb, count);
7312 discard = le64_to_cpu(gd->bg_blkno) + 7313 discard = ocfs2_clusters_to_blocks(sb, start);
7313 ocfs2_clusters_to_blocks(sb, start); 7314
7315 /*
7316 * For the first cluster group, the gd->bg_blkno is not at the start
7317 * of the group, but at an offset from the start. If we add it while
7318 * calculating discard for first group, we will wrongly start fstrim a
7319 * few blocks after the desried start block and the range can cross
7320 * over into the next cluster group. So, add it only if this is not
7321 * the first cluster group.
7322 */
7323 if (group != osb->first_cluster_group_blkno)
7324 discard += le64_to_cpu(gd->bg_blkno);
7314 7325
7315 trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); 7326 trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
7316 7327
@@ -7318,7 +7329,7 @@ static int ocfs2_trim_extent(struct super_block *sb,
7318} 7329}
7319 7330
7320static int ocfs2_trim_group(struct super_block *sb, 7331static int ocfs2_trim_group(struct super_block *sb,
7321 struct ocfs2_group_desc *gd, 7332 struct ocfs2_group_desc *gd, u64 group,
7322 u32 start, u32 max, u32 minbits) 7333 u32 start, u32 max, u32 minbits)
7323{ 7334{
7324 int ret = 0, count = 0, next; 7335 int ret = 0, count = 0, next;
@@ -7337,7 +7348,7 @@ static int ocfs2_trim_group(struct super_block *sb,
7337 next = ocfs2_find_next_bit(bitmap, max, start); 7348 next = ocfs2_find_next_bit(bitmap, max, start);
7338 7349
7339 if ((next - start) >= minbits) { 7350 if ((next - start) >= minbits) {
7340 ret = ocfs2_trim_extent(sb, gd, 7351 ret = ocfs2_trim_extent(sb, gd, group,
7341 start, next - start); 7352 start, next - start);
7342 if (ret < 0) { 7353 if (ret < 0) {
7343 mlog_errno(ret); 7354 mlog_errno(ret);
@@ -7435,7 +7446,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
7435 } 7446 }
7436 7447
7437 gd = (struct ocfs2_group_desc *)gd_bh->b_data; 7448 gd = (struct ocfs2_group_desc *)gd_bh->b_data;
7438 cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); 7449 cnt = ocfs2_trim_group(sb, gd, group,
7450 first_bit, last_bit, minlen);
7439 brelse(gd_bh); 7451 brelse(gd_bh);
7440 gd_bh = NULL; 7452 gd_bh = NULL;
7441 if (cnt < 0) { 7453 if (cnt < 0) {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 280282b05bc7..6744bd706ecf 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1311,13 +1311,15 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1311 pmd_t pmd = *pmdp; 1311 pmd_t pmd = *pmdp;
1312 struct page *page = NULL; 1312 struct page *page = NULL;
1313 1313
1314 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) 1314 if (vma->vm_flags & VM_SOFTDIRTY)
1315 flags |= PM_SOFT_DIRTY; 1315 flags |= PM_SOFT_DIRTY;
1316 1316
1317 if (pmd_present(pmd)) { 1317 if (pmd_present(pmd)) {
1318 page = pmd_page(pmd); 1318 page = pmd_page(pmd);
1319 1319
1320 flags |= PM_PRESENT; 1320 flags |= PM_PRESENT;
1321 if (pmd_soft_dirty(pmd))
1322 flags |= PM_SOFT_DIRTY;
1321 if (pm->show_pfn) 1323 if (pm->show_pfn)
1322 frame = pmd_pfn(pmd) + 1324 frame = pmd_pfn(pmd) +
1323 ((addr & ~PMD_MASK) >> PAGE_SHIFT); 1325 ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1329,6 +1331,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1329 frame = swp_type(entry) | 1331 frame = swp_type(entry) |
1330 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 1332 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
1331 flags |= PM_SWAP; 1333 flags |= PM_SWAP;
1334 if (pmd_swp_soft_dirty(pmd))
1335 flags |= PM_SOFT_DIRTY;
1332 VM_BUG_ON(!is_pmd_migration_entry(pmd)); 1336 VM_BUG_ON(!is_pmd_migration_entry(pmd));
1333 page = migration_entry_to_page(entry); 1337 page = migration_entry_to_page(entry);
1334 } 1338 }
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b489bd77bbdc..f02fb5db8914 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -266,6 +266,10 @@ struct swap_info_struct {
266 * both locks need hold, hold swap_lock 266 * both locks need hold, hold swap_lock
267 * first. 267 * first.
268 */ 268 */
269 spinlock_t cont_lock; /*
270 * protect swap count continuation page
271 * list.
272 */
269 struct work_struct discard_work; /* discard worker */ 273 struct work_struct discard_work; /* discard worker */
270 struct swap_cluster_list discard_clusters; /* discard clusters list */ 274 struct swap_cluster_list discard_clusters; /* discard clusters list */
271}; 275};
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 269b5df58543..1981ed697dab 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -941,6 +941,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
941 pmd = pmd_swp_mksoft_dirty(pmd); 941 pmd = pmd_swp_mksoft_dirty(pmd);
942 set_pmd_at(src_mm, addr, src_pmd, pmd); 942 set_pmd_at(src_mm, addr, src_pmd, pmd);
943 } 943 }
944 add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
945 atomic_long_inc(&dst_mm->nr_ptes);
946 pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
944 set_pmd_at(dst_mm, addr, dst_pmd, pmd); 947 set_pmd_at(dst_mm, addr, dst_pmd, pmd);
945 ret = 0; 948 ret = 0;
946 goto out_unlock; 949 goto out_unlock;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 424b0ef08a60..2d2ff5e8bf2b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3984,6 +3984,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
3984 unsigned long src_addr, 3984 unsigned long src_addr,
3985 struct page **pagep) 3985 struct page **pagep)
3986{ 3986{
3987 struct address_space *mapping;
3988 pgoff_t idx;
3989 unsigned long size;
3987 int vm_shared = dst_vma->vm_flags & VM_SHARED; 3990 int vm_shared = dst_vma->vm_flags & VM_SHARED;
3988 struct hstate *h = hstate_vma(dst_vma); 3991 struct hstate *h = hstate_vma(dst_vma);
3989 pte_t _dst_pte; 3992 pte_t _dst_pte;
@@ -4021,13 +4024,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
4021 __SetPageUptodate(page); 4024 __SetPageUptodate(page);
4022 set_page_huge_active(page); 4025 set_page_huge_active(page);
4023 4026
4027 mapping = dst_vma->vm_file->f_mapping;
4028 idx = vma_hugecache_offset(h, dst_vma, dst_addr);
4029
4024 /* 4030 /*
4025 * If shared, add to page cache 4031 * If shared, add to page cache
4026 */ 4032 */
4027 if (vm_shared) { 4033 if (vm_shared) {
4028 struct address_space *mapping = dst_vma->vm_file->f_mapping; 4034 size = i_size_read(mapping->host) >> huge_page_shift(h);
4029 pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr); 4035 ret = -EFAULT;
4036 if (idx >= size)
4037 goto out_release_nounlock;
4030 4038
4039 /*
4040 * Serialization between remove_inode_hugepages() and
4041 * huge_add_to_page_cache() below happens through the
4042 * hugetlb_fault_mutex_table that here must be hold by
4043 * the caller.
4044 */
4031 ret = huge_add_to_page_cache(page, mapping, idx); 4045 ret = huge_add_to_page_cache(page, mapping, idx);
4032 if (ret) 4046 if (ret)
4033 goto out_release_nounlock; 4047 goto out_release_nounlock;
@@ -4036,6 +4050,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
4036 ptl = huge_pte_lockptr(h, dst_mm, dst_pte); 4050 ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
4037 spin_lock(ptl); 4051 spin_lock(ptl);
4038 4052
4053 /*
4054 * Recheck the i_size after holding PT lock to make sure not
4055 * to leave any page mapped (as page_mapped()) beyond the end
4056 * of the i_size (remove_inode_hugepages() is strict about
4057 * enforcing that). If we bail out here, we'll also leave a
4058 * page in the radix tree in the vm_shared case beyond the end
4059 * of the i_size, but remove_inode_hugepages() will take care
4060 * of it as soon as we drop the hugetlb_fault_mutex_table.
4061 */
4062 size = i_size_read(mapping->host) >> huge_page_shift(h);
4063 ret = -EFAULT;
4064 if (idx >= size)
4065 goto out_release_unlock;
4066
4039 ret = -EEXIST; 4067 ret = -EEXIST;
4040 if (!huge_pte_none(huge_ptep_get(dst_pte))) 4068 if (!huge_pte_none(huge_ptep_get(dst_pte)))
4041 goto out_release_unlock; 4069 goto out_release_unlock;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bf91dc9e7a79..e47a21e64764 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2869,6 +2869,7 @@ static struct swap_info_struct *alloc_swap_info(void)
2869 p->flags = SWP_USED; 2869 p->flags = SWP_USED;
2870 spin_unlock(&swap_lock); 2870 spin_unlock(&swap_lock);
2871 spin_lock_init(&p->lock); 2871 spin_lock_init(&p->lock);
2872 spin_lock_init(&p->cont_lock);
2872 2873
2873 return p; 2874 return p;
2874} 2875}
@@ -3545,6 +3546,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
3545 head = vmalloc_to_page(si->swap_map + offset); 3546 head = vmalloc_to_page(si->swap_map + offset);
3546 offset &= ~PAGE_MASK; 3547 offset &= ~PAGE_MASK;
3547 3548
3549 spin_lock(&si->cont_lock);
3548 /* 3550 /*
3549 * Page allocation does not initialize the page's lru field, 3551 * Page allocation does not initialize the page's lru field,
3550 * but it does always reset its private field. 3552 * but it does always reset its private field.
@@ -3564,7 +3566,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
3564 * a continuation page, free our allocation and use this one. 3566 * a continuation page, free our allocation and use this one.
3565 */ 3567 */
3566 if (!(count & COUNT_CONTINUED)) 3568 if (!(count & COUNT_CONTINUED))
3567 goto out; 3569 goto out_unlock_cont;
3568 3570
3569 map = kmap_atomic(list_page) + offset; 3571 map = kmap_atomic(list_page) + offset;
3570 count = *map; 3572 count = *map;
@@ -3575,11 +3577,13 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
3575 * free our allocation and use this one. 3577 * free our allocation and use this one.
3576 */ 3578 */
3577 if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX) 3579 if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
3578 goto out; 3580 goto out_unlock_cont;
3579 } 3581 }
3580 3582
3581 list_add_tail(&page->lru, &head->lru); 3583 list_add_tail(&page->lru, &head->lru);
3582 page = NULL; /* now it's attached, don't free it */ 3584 page = NULL; /* now it's attached, don't free it */
3585out_unlock_cont:
3586 spin_unlock(&si->cont_lock);
3583out: 3587out:
3584 unlock_cluster(ci); 3588 unlock_cluster(ci);
3585 spin_unlock(&si->lock); 3589 spin_unlock(&si->lock);
@@ -3604,6 +3608,7 @@ static bool swap_count_continued(struct swap_info_struct *si,
3604 struct page *head; 3608 struct page *head;
3605 struct page *page; 3609 struct page *page;
3606 unsigned char *map; 3610 unsigned char *map;
3611 bool ret;
3607 3612
3608 head = vmalloc_to_page(si->swap_map + offset); 3613 head = vmalloc_to_page(si->swap_map + offset);
3609 if (page_private(head) != SWP_CONTINUED) { 3614 if (page_private(head) != SWP_CONTINUED) {
@@ -3611,6 +3616,7 @@ static bool swap_count_continued(struct swap_info_struct *si,
3611 return false; /* need to add count continuation */ 3616 return false; /* need to add count continuation */
3612 } 3617 }
3613 3618
3619 spin_lock(&si->cont_lock);
3614 offset &= ~PAGE_MASK; 3620 offset &= ~PAGE_MASK;
3615 page = list_entry(head->lru.next, struct page, lru); 3621 page = list_entry(head->lru.next, struct page, lru);
3616 map = kmap_atomic(page) + offset; 3622 map = kmap_atomic(page) + offset;
@@ -3631,8 +3637,10 @@ static bool swap_count_continued(struct swap_info_struct *si,
3631 if (*map == SWAP_CONT_MAX) { 3637 if (*map == SWAP_CONT_MAX) {
3632 kunmap_atomic(map); 3638 kunmap_atomic(map);
3633 page = list_entry(page->lru.next, struct page, lru); 3639 page = list_entry(page->lru.next, struct page, lru);
3634 if (page == head) 3640 if (page == head) {
3635 return false; /* add count continuation */ 3641 ret = false; /* add count continuation */
3642 goto out;
3643 }
3636 map = kmap_atomic(page) + offset; 3644 map = kmap_atomic(page) + offset;
3637init_map: *map = 0; /* we didn't zero the page */ 3645init_map: *map = 0; /* we didn't zero the page */
3638 } 3646 }
@@ -3645,7 +3653,7 @@ init_map: *map = 0; /* we didn't zero the page */
3645 kunmap_atomic(map); 3653 kunmap_atomic(map);
3646 page = list_entry(page->lru.prev, struct page, lru); 3654 page = list_entry(page->lru.prev, struct page, lru);
3647 } 3655 }
3648 return true; /* incremented */ 3656 ret = true; /* incremented */
3649 3657
3650 } else { /* decrementing */ 3658 } else { /* decrementing */
3651 /* 3659 /*
@@ -3671,8 +3679,11 @@ init_map: *map = 0; /* we didn't zero the page */
3671 kunmap_atomic(map); 3679 kunmap_atomic(map);
3672 page = list_entry(page->lru.prev, struct page, lru); 3680 page = list_entry(page->lru.prev, struct page, lru);
3673 } 3681 }
3674 return count == COUNT_CONTINUED; 3682 ret = count == COUNT_CONTINUED;
3675 } 3683 }
3684out:
3685 spin_unlock(&si->cont_lock);
3686 return ret;
3676} 3687}
3677 3688
3678/* 3689/*
diff --git a/usr/Makefile b/usr/Makefile
index 34a9fcd0f537..237a028693ce 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -8,6 +8,7 @@ PHONY += klibcdirs
8 8
9suffix_y = $(subst $\",,$(CONFIG_INITRAMFS_COMPRESSION)) 9suffix_y = $(subst $\",,$(CONFIG_INITRAMFS_COMPRESSION))
10datafile_y = initramfs_data.cpio$(suffix_y) 10datafile_y = initramfs_data.cpio$(suffix_y)
11datafile_d_y = .$(datafile_y).d
11AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)" 12AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)"
12 13
13 14
@@ -30,12 +31,12 @@ ramfs-args := \
30 $(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \ 31 $(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \
31 $(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID)) 32 $(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID))
32 33
33# .initramfs_data.cpio.d is used to identify all files included 34# $(datafile_d_y) is used to identify all files included
34# in initramfs and to detect if any files are added/removed. 35# in initramfs and to detect if any files are added/removed.
35# Removed files are identified by directory timestamp being updated 36# Removed files are identified by directory timestamp being updated
36# The dependency list is generated by gen_initramfs.sh -l 37# The dependency list is generated by gen_initramfs.sh -l
37ifneq ($(wildcard $(obj)/.initramfs_data.cpio.d),) 38ifneq ($(wildcard $(obj)/$(datafile_d_y)),)
38 include $(obj)/.initramfs_data.cpio.d 39 include $(obj)/$(datafile_d_y)
39endif 40endif
40 41
41quiet_cmd_initfs = GEN $@ 42quiet_cmd_initfs = GEN $@
@@ -53,5 +54,5 @@ $(deps_initramfs): klibcdirs
53# 3) If gen_init_cpio are newer than initramfs_data.cpio 54# 3) If gen_init_cpio are newer than initramfs_data.cpio
54# 4) arguments to gen_initramfs.sh changes 55# 4) arguments to gen_initramfs.sh changes
55$(obj)/$(datafile_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs 56$(obj)/$(datafile_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs
56 $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d 57 $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/$(datafile_d_y)
57 $(call if_changed,initfs) 58 $(call if_changed,initfs)