diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-25 19:21:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-25 19:21:17 -0400 |
commit | 9c8e30d12d46461c283608d3123debc4e75d4f8b (patch) | |
tree | c354074a091c9bacc700a2dc861085da8a7e0385 | |
parent | b8517e98305e3c76fa293133826afe39a690edcd (diff) | |
parent | b7b04004ecd9e58cdc6c6ff92f251d5ac5c0adb2 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"15 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm: numa: mark huge PTEs young when clearing NUMA hinting faults
mm: numa: slow PTE scan rate if migration failures occur
mm: numa: preserve PTE write permissions across a NUMA hinting fault
mm: numa: group related processes based on VMA flags instead of page table flags
hfsplus: fix B-tree corruption after insertion at position 0
MAINTAINERS: add Jan as DMI/SMBIOS support maintainer
fs/affs/file.c: unlock/release page on error
mm/page_alloc.c: call kernel_map_pages in unset_migrateype_isolate
mm/slub: fix lockups on PREEMPT && !SMP kernels
mm/memory hotplug: postpone the reset of obsolete pgdat
MAINTAINERS: correct rtc armada38x pattern entry
mm/pagewalk.c: prevent positive return value of walk_page_test() from being passed to callers
mm: fix anon_vma->degree underflow in anon_vma endless growing prevention
drivers/rtc/rtc-mrst: fix suspend/resume
aoe: update aoe maintainer information
-rw-r--r-- | MAINTAINERS | 13 | ||||
-rw-r--r-- | drivers/rtc/rtc-mrst.c | 17 | ||||
-rw-r--r-- | fs/affs/file.c | 19 | ||||
-rw-r--r-- | fs/hfsplus/brec.c | 20 | ||||
-rw-r--r-- | include/linux/sched.h | 9 | ||||
-rw-r--r-- | kernel/sched/fair.c | 8 | ||||
-rw-r--r-- | mm/huge_memory.c | 26 | ||||
-rw-r--r-- | mm/memory.c | 22 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 13 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/mprotect.c | 3 | ||||
-rw-r--r-- | mm/page_isolation.c | 1 | ||||
-rw-r--r-- | mm/pagewalk.c | 9 | ||||
-rw-r--r-- | mm/rmap.c | 7 | ||||
-rw-r--r-- | mm/slub.c | 6 |
15 files changed, 106 insertions, 71 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 358eb0105e00..88c09ca2584f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1186,7 +1186,7 @@ M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> | |||
1186 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 1186 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
1187 | S: Maintained | 1187 | S: Maintained |
1188 | F: arch/arm/mach-mvebu/ | 1188 | F: arch/arm/mach-mvebu/ |
1189 | F: drivers/rtc/armada38x-rtc | 1189 | F: drivers/rtc/rtc-armada38x.c |
1190 | 1190 | ||
1191 | ARM/Marvell Berlin SoC support | 1191 | ARM/Marvell Berlin SoC support |
1192 | M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> | 1192 | M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> |
@@ -1675,8 +1675,8 @@ F: drivers/misc/eeprom/at24.c | |||
1675 | F: include/linux/platform_data/at24.h | 1675 | F: include/linux/platform_data/at24.h |
1676 | 1676 | ||
1677 | ATA OVER ETHERNET (AOE) DRIVER | 1677 | ATA OVER ETHERNET (AOE) DRIVER |
1678 | M: "Ed L. Cashin" <ecashin@coraid.com> | 1678 | M: "Ed L. Cashin" <ed.cashin@acm.org> |
1679 | W: http://support.coraid.com/support/linux | 1679 | W: http://www.openaoe.org/ |
1680 | S: Supported | 1680 | S: Supported |
1681 | F: Documentation/aoe/ | 1681 | F: Documentation/aoe/ |
1682 | F: drivers/block/aoe/ | 1682 | F: drivers/block/aoe/ |
@@ -3252,6 +3252,13 @@ S: Maintained | |||
3252 | F: Documentation/hwmon/dme1737 | 3252 | F: Documentation/hwmon/dme1737 |
3253 | F: drivers/hwmon/dme1737.c | 3253 | F: drivers/hwmon/dme1737.c |
3254 | 3254 | ||
3255 | DMI/SMBIOS SUPPORT | ||
3256 | M: Jean Delvare <jdelvare@suse.de> | ||
3257 | S: Maintained | ||
3258 | F: drivers/firmware/dmi-id.c | ||
3259 | F: drivers/firmware/dmi_scan.c | ||
3260 | F: include/linux/dmi.h | ||
3261 | |||
3255 | DOCKING STATION DRIVER | 3262 | DOCKING STATION DRIVER |
3256 | M: Shaohua Li <shaohua.li@intel.com> | 3263 | M: Shaohua Li <shaohua.li@intel.com> |
3257 | L: linux-acpi@vger.kernel.org | 3264 | L: linux-acpi@vger.kernel.org |
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index e2436d140175..3a6fd3a8a2ec 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c | |||
@@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev) | |||
413 | mrst->dev = NULL; | 413 | mrst->dev = NULL; |
414 | } | 414 | } |
415 | 415 | ||
416 | #ifdef CONFIG_PM | 416 | #ifdef CONFIG_PM_SLEEP |
417 | static int mrst_suspend(struct device *dev, pm_message_t mesg) | 417 | static int mrst_suspend(struct device *dev) |
418 | { | 418 | { |
419 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | 419 | struct mrst_rtc *mrst = dev_get_drvdata(dev); |
420 | unsigned char tmp; | 420 | unsigned char tmp; |
@@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg) | |||
453 | */ | 453 | */ |
454 | static inline int mrst_poweroff(struct device *dev) | 454 | static inline int mrst_poweroff(struct device *dev) |
455 | { | 455 | { |
456 | return mrst_suspend(dev, PMSG_HIBERNATE); | 456 | return mrst_suspend(dev); |
457 | } | 457 | } |
458 | 458 | ||
459 | static int mrst_resume(struct device *dev) | 459 | static int mrst_resume(struct device *dev) |
@@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev) | |||
490 | return 0; | 490 | return 0; |
491 | } | 491 | } |
492 | 492 | ||
493 | static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume); | ||
494 | #define MRST_PM_OPS (&mrst_pm_ops) | ||
495 | |||
493 | #else | 496 | #else |
494 | #define mrst_suspend NULL | 497 | #define MRST_PM_OPS NULL |
495 | #define mrst_resume NULL | ||
496 | 498 | ||
497 | static inline int mrst_poweroff(struct device *dev) | 499 | static inline int mrst_poweroff(struct device *dev) |
498 | { | 500 | { |
@@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = { | |||
529 | .remove = vrtc_mrst_platform_remove, | 531 | .remove = vrtc_mrst_platform_remove, |
530 | .shutdown = vrtc_mrst_platform_shutdown, | 532 | .shutdown = vrtc_mrst_platform_shutdown, |
531 | .driver = { | 533 | .driver = { |
532 | .name = (char *) driver_name, | 534 | .name = driver_name, |
533 | .suspend = mrst_suspend, | 535 | .pm = MRST_PM_OPS, |
534 | .resume = mrst_resume, | ||
535 | } | 536 | } |
536 | }; | 537 | }; |
537 | 538 | ||
diff --git a/fs/affs/file.c b/fs/affs/file.c index d2468bf95669..a91795e01a7f 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
699 | boff = tmp % bsize; | 699 | boff = tmp % bsize; |
700 | if (boff) { | 700 | if (boff) { |
701 | bh = affs_bread_ino(inode, bidx, 0); | 701 | bh = affs_bread_ino(inode, bidx, 0); |
702 | if (IS_ERR(bh)) | 702 | if (IS_ERR(bh)) { |
703 | return PTR_ERR(bh); | 703 | written = PTR_ERR(bh); |
704 | goto err_first_bh; | ||
705 | } | ||
704 | tmp = min(bsize - boff, to - from); | 706 | tmp = min(bsize - boff, to - from); |
705 | BUG_ON(boff + tmp > bsize || tmp > bsize); | 707 | BUG_ON(boff + tmp > bsize || tmp > bsize); |
706 | memcpy(AFFS_DATA(bh) + boff, data + from, tmp); | 708 | memcpy(AFFS_DATA(bh) + boff, data + from, tmp); |
@@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
712 | bidx++; | 714 | bidx++; |
713 | } else if (bidx) { | 715 | } else if (bidx) { |
714 | bh = affs_bread_ino(inode, bidx - 1, 0); | 716 | bh = affs_bread_ino(inode, bidx - 1, 0); |
715 | if (IS_ERR(bh)) | 717 | if (IS_ERR(bh)) { |
716 | return PTR_ERR(bh); | 718 | written = PTR_ERR(bh); |
719 | goto err_first_bh; | ||
720 | } | ||
717 | } | 721 | } |
718 | while (from + bsize <= to) { | 722 | while (from + bsize <= to) { |
719 | prev_bh = bh; | 723 | prev_bh = bh; |
720 | bh = affs_getemptyblk_ino(inode, bidx); | 724 | bh = affs_getemptyblk_ino(inode, bidx); |
721 | if (IS_ERR(bh)) | 725 | if (IS_ERR(bh)) |
722 | goto out; | 726 | goto err_bh; |
723 | memcpy(AFFS_DATA(bh), data + from, bsize); | 727 | memcpy(AFFS_DATA(bh), data + from, bsize); |
724 | if (buffer_new(bh)) { | 728 | if (buffer_new(bh)) { |
725 | AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); | 729 | AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); |
@@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
751 | prev_bh = bh; | 755 | prev_bh = bh; |
752 | bh = affs_bread_ino(inode, bidx, 1); | 756 | bh = affs_bread_ino(inode, bidx, 1); |
753 | if (IS_ERR(bh)) | 757 | if (IS_ERR(bh)) |
754 | goto out; | 758 | goto err_bh; |
755 | tmp = min(bsize, to - from); | 759 | tmp = min(bsize, to - from); |
756 | BUG_ON(tmp > bsize); | 760 | BUG_ON(tmp > bsize); |
757 | memcpy(AFFS_DATA(bh), data + from, tmp); | 761 | memcpy(AFFS_DATA(bh), data + from, tmp); |
@@ -790,12 +794,13 @@ done: | |||
790 | if (tmp > inode->i_size) | 794 | if (tmp > inode->i_size) |
791 | inode->i_size = AFFS_I(inode)->mmu_private = tmp; | 795 | inode->i_size = AFFS_I(inode)->mmu_private = tmp; |
792 | 796 | ||
797 | err_first_bh: | ||
793 | unlock_page(page); | 798 | unlock_page(page); |
794 | page_cache_release(page); | 799 | page_cache_release(page); |
795 | 800 | ||
796 | return written; | 801 | return written; |
797 | 802 | ||
798 | out: | 803 | err_bh: |
799 | bh = prev_bh; | 804 | bh = prev_bh; |
800 | if (!written) | 805 | if (!written) |
801 | written = PTR_ERR(bh); | 806 | written = PTR_ERR(bh); |
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d56094b..754fdf8c6356 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c | |||
@@ -131,13 +131,16 @@ skip: | |||
131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); | 131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); |
132 | hfs_bnode_dump(node); | 132 | hfs_bnode_dump(node); |
133 | 133 | ||
134 | if (new_node) { | 134 | /* |
135 | /* update parent key if we inserted a key | 135 | * update parent key if we inserted a key |
136 | * at the start of the first node | 136 | * at the start of the node and it is not the new node |
137 | */ | 137 | */ |
138 | if (!rec && new_node != node) | 138 | if (!rec && new_node != node) { |
139 | hfs_brec_update_parent(fd); | 139 | hfs_bnode_read_key(node, fd->search_key, data_off + size); |
140 | hfs_brec_update_parent(fd); | ||
141 | } | ||
140 | 142 | ||
143 | if (new_node) { | ||
141 | hfs_bnode_put(fd->bnode); | 144 | hfs_bnode_put(fd->bnode); |
142 | if (!new_node->parent) { | 145 | if (!new_node->parent) { |
143 | hfs_btree_inc_height(tree); | 146 | hfs_btree_inc_height(tree); |
@@ -168,9 +171,6 @@ skip: | |||
168 | goto again; | 171 | goto again; |
169 | } | 172 | } |
170 | 173 | ||
171 | if (!rec) | ||
172 | hfs_brec_update_parent(fd); | ||
173 | |||
174 | return 0; | 174 | return 0; |
175 | } | 175 | } |
176 | 176 | ||
@@ -370,6 +370,8 @@ again: | |||
370 | if (IS_ERR(parent)) | 370 | if (IS_ERR(parent)) |
371 | return PTR_ERR(parent); | 371 | return PTR_ERR(parent); |
372 | __hfs_brec_find(parent, fd, hfs_find_rec_by_key); | 372 | __hfs_brec_find(parent, fd, hfs_find_rec_by_key); |
373 | if (fd->record < 0) | ||
374 | return -ENOENT; | ||
373 | hfs_bnode_dump(parent); | 375 | hfs_bnode_dump(parent); |
374 | rec = fd->record; | 376 | rec = fd->record; |
375 | 377 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..a419b65770d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1625,11 +1625,11 @@ struct task_struct { | |||
1625 | 1625 | ||
1626 | /* | 1626 | /* |
1627 | * numa_faults_locality tracks if faults recorded during the last | 1627 | * numa_faults_locality tracks if faults recorded during the last |
1628 | * scan window were remote/local. The task scan period is adapted | 1628 | * scan window were remote/local or failed to migrate. The task scan |
1629 | * based on the locality of the faults with different weights | 1629 | * period is adapted based on the locality of the faults with different |
1630 | * depending on whether they were shared or private faults | 1630 | * weights depending on whether they were shared or private faults |
1631 | */ | 1631 | */ |
1632 | unsigned long numa_faults_locality[2]; | 1632 | unsigned long numa_faults_locality[3]; |
1633 | 1633 | ||
1634 | unsigned long numa_pages_migrated; | 1634 | unsigned long numa_pages_migrated; |
1635 | #endif /* CONFIG_NUMA_BALANCING */ | 1635 | #endif /* CONFIG_NUMA_BALANCING */ |
@@ -1719,6 +1719,7 @@ struct task_struct { | |||
1719 | #define TNF_NO_GROUP 0x02 | 1719 | #define TNF_NO_GROUP 0x02 |
1720 | #define TNF_SHARED 0x04 | 1720 | #define TNF_SHARED 0x04 |
1721 | #define TNF_FAULT_LOCAL 0x08 | 1721 | #define TNF_FAULT_LOCAL 0x08 |
1722 | #define TNF_MIGRATE_FAIL 0x10 | ||
1722 | 1723 | ||
1723 | #ifdef CONFIG_NUMA_BALANCING | 1724 | #ifdef CONFIG_NUMA_BALANCING |
1724 | extern void task_numa_fault(int last_node, int node, int pages, int flags); | 1725 | extern void task_numa_fault(int last_node, int node, int pages, int flags); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7ce18f3c097a..bcfe32088b37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p, | |||
1609 | /* | 1609 | /* |
1610 | * If there were no record hinting faults then either the task is | 1610 | * If there were no record hinting faults then either the task is |
1611 | * completely idle or all activity is areas that are not of interest | 1611 | * completely idle or all activity is areas that are not of interest |
1612 | * to automatic numa balancing. Scan slower | 1612 | * to automatic numa balancing. Related to that, if there were failed |
1613 | * migration then it implies we are migrating too quickly or the local | ||
1614 | * node is overloaded. In either case, scan slower | ||
1613 | */ | 1615 | */ |
1614 | if (local + shared == 0) { | 1616 | if (local + shared == 0 || p->numa_faults_locality[2]) { |
1615 | p->numa_scan_period = min(p->numa_scan_period_max, | 1617 | p->numa_scan_period = min(p->numa_scan_period_max, |
1616 | p->numa_scan_period << 1); | 1618 | p->numa_scan_period << 1); |
1617 | 1619 | ||
@@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
2080 | 2082 | ||
2081 | if (migrated) | 2083 | if (migrated) |
2082 | p->numa_pages_migrated += pages; | 2084 | p->numa_pages_migrated += pages; |
2085 | if (flags & TNF_MIGRATE_FAIL) | ||
2086 | p->numa_faults_locality[2] += pages; | ||
2083 | 2087 | ||
2084 | p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; | 2088 | p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; |
2085 | p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; | 2089 | p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 626e93db28ba..6817b0350c71 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1260 | int target_nid, last_cpupid = -1; | 1260 | int target_nid, last_cpupid = -1; |
1261 | bool page_locked; | 1261 | bool page_locked; |
1262 | bool migrated = false; | 1262 | bool migrated = false; |
1263 | bool was_writable; | ||
1263 | int flags = 0; | 1264 | int flags = 0; |
1264 | 1265 | ||
1265 | /* A PROT_NONE fault should not end up here */ | 1266 | /* A PROT_NONE fault should not end up here */ |
@@ -1291,17 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1291 | flags |= TNF_FAULT_LOCAL; | 1292 | flags |= TNF_FAULT_LOCAL; |
1292 | } | 1293 | } |
1293 | 1294 | ||
1294 | /* | 1295 | /* See similar comment in do_numa_page for explanation */ |
1295 | * Avoid grouping on DSO/COW pages in specific and RO pages | 1296 | if (!(vma->vm_flags & VM_WRITE)) |
1296 | * in general, RO pages shouldn't hurt as much anyway since | ||
1297 | * they can be in shared cache state. | ||
1298 | * | ||
1299 | * FIXME! This checks "pmd_dirty()" as an approximation of | ||
1300 | * "is this a read-only page", since checking "pmd_write()" | ||
1301 | * is even more broken. We haven't actually turned this into | ||
1302 | * a writable page, so pmd_write() will always be false. | ||
1303 | */ | ||
1304 | if (!pmd_dirty(pmd)) | ||
1305 | flags |= TNF_NO_GROUP; | 1297 | flags |= TNF_NO_GROUP; |
1306 | 1298 | ||
1307 | /* | 1299 | /* |
@@ -1358,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1358 | if (migrated) { | 1350 | if (migrated) { |
1359 | flags |= TNF_MIGRATED; | 1351 | flags |= TNF_MIGRATED; |
1360 | page_nid = target_nid; | 1352 | page_nid = target_nid; |
1361 | } | 1353 | } else |
1354 | flags |= TNF_MIGRATE_FAIL; | ||
1362 | 1355 | ||
1363 | goto out; | 1356 | goto out; |
1364 | clear_pmdnuma: | 1357 | clear_pmdnuma: |
1365 | BUG_ON(!PageLocked(page)); | 1358 | BUG_ON(!PageLocked(page)); |
1359 | was_writable = pmd_write(pmd); | ||
1366 | pmd = pmd_modify(pmd, vma->vm_page_prot); | 1360 | pmd = pmd_modify(pmd, vma->vm_page_prot); |
1361 | pmd = pmd_mkyoung(pmd); | ||
1362 | if (was_writable) | ||
1363 | pmd = pmd_mkwrite(pmd); | ||
1367 | set_pmd_at(mm, haddr, pmdp, pmd); | 1364 | set_pmd_at(mm, haddr, pmdp, pmd); |
1368 | update_mmu_cache_pmd(vma, addr, pmdp); | 1365 | update_mmu_cache_pmd(vma, addr, pmdp); |
1369 | unlock_page(page); | 1366 | unlock_page(page); |
@@ -1487,6 +1484,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1487 | 1484 | ||
1488 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { | 1485 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
1489 | pmd_t entry; | 1486 | pmd_t entry; |
1487 | bool preserve_write = prot_numa && pmd_write(*pmd); | ||
1490 | ret = 1; | 1488 | ret = 1; |
1491 | 1489 | ||
1492 | /* | 1490 | /* |
@@ -1502,9 +1500,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1502 | if (!prot_numa || !pmd_protnone(*pmd)) { | 1500 | if (!prot_numa || !pmd_protnone(*pmd)) { |
1503 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); | 1501 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); |
1504 | entry = pmd_modify(entry, newprot); | 1502 | entry = pmd_modify(entry, newprot); |
1503 | if (preserve_write) | ||
1504 | entry = pmd_mkwrite(entry); | ||
1505 | ret = HPAGE_PMD_NR; | 1505 | ret = HPAGE_PMD_NR; |
1506 | set_pmd_at(mm, addr, pmd, entry); | 1506 | set_pmd_at(mm, addr, pmd, entry); |
1507 | BUG_ON(pmd_write(entry)); | 1507 | BUG_ON(!preserve_write && pmd_write(entry)); |
1508 | } | 1508 | } |
1509 | spin_unlock(ptl); | 1509 | spin_unlock(ptl); |
1510 | } | 1510 | } |
diff --git a/mm/memory.c b/mm/memory.c index 411144f977b1..97839f5c8c30 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3035 | int last_cpupid; | 3035 | int last_cpupid; |
3036 | int target_nid; | 3036 | int target_nid; |
3037 | bool migrated = false; | 3037 | bool migrated = false; |
3038 | bool was_writable = pte_write(pte); | ||
3038 | int flags = 0; | 3039 | int flags = 0; |
3039 | 3040 | ||
3040 | /* A PROT_NONE fault should not end up here */ | 3041 | /* A PROT_NONE fault should not end up here */ |
@@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3059 | /* Make it present again */ | 3060 | /* Make it present again */ |
3060 | pte = pte_modify(pte, vma->vm_page_prot); | 3061 | pte = pte_modify(pte, vma->vm_page_prot); |
3061 | pte = pte_mkyoung(pte); | 3062 | pte = pte_mkyoung(pte); |
3063 | if (was_writable) | ||
3064 | pte = pte_mkwrite(pte); | ||
3062 | set_pte_at(mm, addr, ptep, pte); | 3065 | set_pte_at(mm, addr, ptep, pte); |
3063 | update_mmu_cache(vma, addr, ptep); | 3066 | update_mmu_cache(vma, addr, ptep); |
3064 | 3067 | ||
@@ -3069,16 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3069 | } | 3072 | } |
3070 | 3073 | ||
3071 | /* | 3074 | /* |
3072 | * Avoid grouping on DSO/COW pages in specific and RO pages | 3075 | * Avoid grouping on RO pages in general. RO pages shouldn't hurt as |
3073 | * in general, RO pages shouldn't hurt as much anyway since | 3076 | * much anyway since they can be in shared cache state. This misses |
3074 | * they can be in shared cache state. | 3077 | * the case where a mapping is writable but the process never writes |
3075 | * | 3078 | * to it but pte_write gets cleared during protection updates and |
3076 | * FIXME! This checks "pmd_dirty()" as an approximation of | 3079 | * pte_dirty has unpredictable behaviour between PTE scan updates, |
3077 | * "is this a read-only page", since checking "pmd_write()" | 3080 | * background writeback, dirty balancing and application behaviour. |
3078 | * is even more broken. We haven't actually turned this into | ||
3079 | * a writable page, so pmd_write() will always be false. | ||
3080 | */ | 3081 | */ |
3081 | if (!pte_dirty(pte)) | 3082 | if (!(vma->vm_flags & VM_WRITE)) |
3082 | flags |= TNF_NO_GROUP; | 3083 | flags |= TNF_NO_GROUP; |
3083 | 3084 | ||
3084 | /* | 3085 | /* |
@@ -3102,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3102 | if (migrated) { | 3103 | if (migrated) { |
3103 | page_nid = target_nid; | 3104 | page_nid = target_nid; |
3104 | flags |= TNF_MIGRATED; | 3105 | flags |= TNF_MIGRATED; |
3105 | } | 3106 | } else |
3107 | flags |= TNF_MIGRATE_FAIL; | ||
3106 | 3108 | ||
3107 | out: | 3109 | out: |
3108 | if (page_nid != -1) | 3110 | if (page_nid != -1) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab10795bea..65842d688b7c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
1092 | return NULL; | 1092 | return NULL; |
1093 | 1093 | ||
1094 | arch_refresh_nodedata(nid, pgdat); | 1094 | arch_refresh_nodedata(nid, pgdat); |
1095 | } else { | ||
1096 | /* Reset the nr_zones and classzone_idx to 0 before reuse */ | ||
1097 | pgdat->nr_zones = 0; | ||
1098 | pgdat->classzone_idx = 0; | ||
1095 | } | 1099 | } |
1096 | 1100 | ||
1097 | /* we can use NODE_DATA(nid) from here */ | 1101 | /* we can use NODE_DATA(nid) from here */ |
@@ -1977,15 +1981,6 @@ void try_offline_node(int nid) | |||
1977 | if (is_vmalloc_addr(zone->wait_table)) | 1981 | if (is_vmalloc_addr(zone->wait_table)) |
1978 | vfree(zone->wait_table); | 1982 | vfree(zone->wait_table); |
1979 | } | 1983 | } |
1980 | |||
1981 | /* | ||
1982 | * Since there is no way to guarentee the address of pgdat/zone is not | ||
1983 | * on stack of any kernel threads or used by other kernel objects | ||
1984 | * without reference counting or other symchronizing method, do not | ||
1985 | * reset node_data and free pgdat here. Just reset it to 0 and reuse | ||
1986 | * the memory when the node is online again. | ||
1987 | */ | ||
1988 | memset(pgdat, 0, sizeof(*pgdat)); | ||
1989 | } | 1984 | } |
1990 | EXPORT_SYMBOL(try_offline_node); | 1985 | EXPORT_SYMBOL(try_offline_node); |
1991 | 1986 | ||
@@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
774 | 774 | ||
775 | importer->anon_vma = exporter->anon_vma; | 775 | importer->anon_vma = exporter->anon_vma; |
776 | error = anon_vma_clone(importer, exporter); | 776 | error = anon_vma_clone(importer, exporter); |
777 | if (error) { | 777 | if (error) |
778 | importer->anon_vma = NULL; | ||
779 | return error; | 778 | return error; |
780 | } | ||
781 | } | 779 | } |
782 | } | 780 | } |
783 | 781 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 44727811bf4c..88584838e704 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
75 | oldpte = *pte; | 75 | oldpte = *pte; |
76 | if (pte_present(oldpte)) { | 76 | if (pte_present(oldpte)) { |
77 | pte_t ptent; | 77 | pte_t ptent; |
78 | bool preserve_write = prot_numa && pte_write(oldpte); | ||
78 | 79 | ||
79 | /* | 80 | /* |
80 | * Avoid trapping faults against the zero or KSM | 81 | * Avoid trapping faults against the zero or KSM |
@@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
94 | 95 | ||
95 | ptent = ptep_modify_prot_start(mm, addr, pte); | 96 | ptent = ptep_modify_prot_start(mm, addr, pte); |
96 | ptent = pte_modify(ptent, newprot); | 97 | ptent = pte_modify(ptent, newprot); |
98 | if (preserve_write) | ||
99 | ptent = pte_mkwrite(ptent); | ||
97 | 100 | ||
98 | /* Avoid taking write faults for known dirty pages */ | 101 | /* Avoid taking write faults for known dirty pages */ |
99 | if (dirty_accountable && pte_dirty(ptent) && | 102 | if (dirty_accountable && pte_dirty(ptent) && |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac381ab3..755a42c76eb4 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) | |||
103 | 103 | ||
104 | if (!is_migrate_isolate_page(buddy)) { | 104 | if (!is_migrate_isolate_page(buddy)) { |
105 | __isolate_free_page(page, order); | 105 | __isolate_free_page(page, order); |
106 | kernel_map_pages(page, (1 << order), 1); | ||
106 | set_page_refcounted(page); | 107 | set_page_refcounted(page); |
107 | isolated_page = page; | 108 | isolated_page = page; |
108 | } | 109 | } |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 75c1f2878519..29f2f8b853ae 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end, | |||
265 | vma = vma->vm_next; | 265 | vma = vma->vm_next; |
266 | 266 | ||
267 | err = walk_page_test(start, next, walk); | 267 | err = walk_page_test(start, next, walk); |
268 | if (err > 0) | 268 | if (err > 0) { |
269 | /* | ||
270 | * positive return values are purely for | ||
271 | * controlling the pagewalk, so should never | ||
272 | * be passed to the callers. | ||
273 | */ | ||
274 | err = 0; | ||
269 | continue; | 275 | continue; |
276 | } | ||
270 | if (err < 0) | 277 | if (err < 0) |
271 | break; | 278 | break; |
272 | } | 279 | } |
@@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
287 | return 0; | 287 | return 0; |
288 | 288 | ||
289 | enomem_failure: | 289 | enomem_failure: |
290 | /* | ||
291 | * dst->anon_vma is dropped here otherwise its degree can be incorrectly | ||
292 | * decremented in unlink_anon_vmas(). | ||
293 | * We can safely do this because callers of anon_vma_clone() don't care | ||
294 | * about dst->anon_vma if anon_vma_clone() failed. | ||
295 | */ | ||
296 | dst->anon_vma = NULL; | ||
290 | unlink_anon_vmas(dst); | 297 | unlink_anon_vmas(dst); |
291 | return -ENOMEM; | 298 | return -ENOMEM; |
292 | } | 299 | } |
@@ -2449,7 +2449,8 @@ redo: | |||
2449 | do { | 2449 | do { |
2450 | tid = this_cpu_read(s->cpu_slab->tid); | 2450 | tid = this_cpu_read(s->cpu_slab->tid); |
2451 | c = raw_cpu_ptr(s->cpu_slab); | 2451 | c = raw_cpu_ptr(s->cpu_slab); |
2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
2453 | unlikely(tid != READ_ONCE(c->tid))); | ||
2453 | 2454 | ||
2454 | /* | 2455 | /* |
2455 | * Irqless object alloc/free algorithm used here depends on sequence | 2456 | * Irqless object alloc/free algorithm used here depends on sequence |
@@ -2718,7 +2719,8 @@ redo: | |||
2718 | do { | 2719 | do { |
2719 | tid = this_cpu_read(s->cpu_slab->tid); | 2720 | tid = this_cpu_read(s->cpu_slab->tid); |
2720 | c = raw_cpu_ptr(s->cpu_slab); | 2721 | c = raw_cpu_ptr(s->cpu_slab); |
2721 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2722 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
2723 | unlikely(tid != READ_ONCE(c->tid))); | ||
2722 | 2724 | ||
2723 | /* Same with comment on barrier() in slab_alloc_node() */ | 2725 | /* Same with comment on barrier() in slab_alloc_node() */ |
2724 | barrier(); | 2726 | barrier(); |