diff options
| -rw-r--r-- | MAINTAINERS | 13 | ||||
| -rw-r--r-- | drivers/rtc/rtc-mrst.c | 17 | ||||
| -rw-r--r-- | fs/affs/file.c | 19 | ||||
| -rw-r--r-- | fs/hfsplus/brec.c | 20 | ||||
| -rw-r--r-- | include/linux/sched.h | 9 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 8 | ||||
| -rw-r--r-- | mm/huge_memory.c | 26 | ||||
| -rw-r--r-- | mm/memory.c | 22 | ||||
| -rw-r--r-- | mm/memory_hotplug.c | 13 | ||||
| -rw-r--r-- | mm/mmap.c | 4 | ||||
| -rw-r--r-- | mm/mprotect.c | 3 | ||||
| -rw-r--r-- | mm/page_isolation.c | 1 | ||||
| -rw-r--r-- | mm/pagewalk.c | 9 | ||||
| -rw-r--r-- | mm/rmap.c | 7 | ||||
| -rw-r--r-- | mm/slub.c | 6 |
15 files changed, 106 insertions, 71 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 358eb0105e00..88c09ca2584f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -1186,7 +1186,7 @@ M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> | |||
| 1186 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 1186 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
| 1187 | S: Maintained | 1187 | S: Maintained |
| 1188 | F: arch/arm/mach-mvebu/ | 1188 | F: arch/arm/mach-mvebu/ |
| 1189 | F: drivers/rtc/armada38x-rtc | 1189 | F: drivers/rtc/rtc-armada38x.c |
| 1190 | 1190 | ||
| 1191 | ARM/Marvell Berlin SoC support | 1191 | ARM/Marvell Berlin SoC support |
| 1192 | M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> | 1192 | M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> |
| @@ -1675,8 +1675,8 @@ F: drivers/misc/eeprom/at24.c | |||
| 1675 | F: include/linux/platform_data/at24.h | 1675 | F: include/linux/platform_data/at24.h |
| 1676 | 1676 | ||
| 1677 | ATA OVER ETHERNET (AOE) DRIVER | 1677 | ATA OVER ETHERNET (AOE) DRIVER |
| 1678 | M: "Ed L. Cashin" <ecashin@coraid.com> | 1678 | M: "Ed L. Cashin" <ed.cashin@acm.org> |
| 1679 | W: http://support.coraid.com/support/linux | 1679 | W: http://www.openaoe.org/ |
| 1680 | S: Supported | 1680 | S: Supported |
| 1681 | F: Documentation/aoe/ | 1681 | F: Documentation/aoe/ |
| 1682 | F: drivers/block/aoe/ | 1682 | F: drivers/block/aoe/ |
| @@ -3252,6 +3252,13 @@ S: Maintained | |||
| 3252 | F: Documentation/hwmon/dme1737 | 3252 | F: Documentation/hwmon/dme1737 |
| 3253 | F: drivers/hwmon/dme1737.c | 3253 | F: drivers/hwmon/dme1737.c |
| 3254 | 3254 | ||
| 3255 | DMI/SMBIOS SUPPORT | ||
| 3256 | M: Jean Delvare <jdelvare@suse.de> | ||
| 3257 | S: Maintained | ||
| 3258 | F: drivers/firmware/dmi-id.c | ||
| 3259 | F: drivers/firmware/dmi_scan.c | ||
| 3260 | F: include/linux/dmi.h | ||
| 3261 | |||
| 3255 | DOCKING STATION DRIVER | 3262 | DOCKING STATION DRIVER |
| 3256 | M: Shaohua Li <shaohua.li@intel.com> | 3263 | M: Shaohua Li <shaohua.li@intel.com> |
| 3257 | L: linux-acpi@vger.kernel.org | 3264 | L: linux-acpi@vger.kernel.org |
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index e2436d140175..3a6fd3a8a2ec 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c | |||
| @@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev) | |||
| 413 | mrst->dev = NULL; | 413 | mrst->dev = NULL; |
| 414 | } | 414 | } |
| 415 | 415 | ||
| 416 | #ifdef CONFIG_PM | 416 | #ifdef CONFIG_PM_SLEEP |
| 417 | static int mrst_suspend(struct device *dev, pm_message_t mesg) | 417 | static int mrst_suspend(struct device *dev) |
| 418 | { | 418 | { |
| 419 | struct mrst_rtc *mrst = dev_get_drvdata(dev); | 419 | struct mrst_rtc *mrst = dev_get_drvdata(dev); |
| 420 | unsigned char tmp; | 420 | unsigned char tmp; |
| @@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg) | |||
| 453 | */ | 453 | */ |
| 454 | static inline int mrst_poweroff(struct device *dev) | 454 | static inline int mrst_poweroff(struct device *dev) |
| 455 | { | 455 | { |
| 456 | return mrst_suspend(dev, PMSG_HIBERNATE); | 456 | return mrst_suspend(dev); |
| 457 | } | 457 | } |
| 458 | 458 | ||
| 459 | static int mrst_resume(struct device *dev) | 459 | static int mrst_resume(struct device *dev) |
| @@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev) | |||
| 490 | return 0; | 490 | return 0; |
| 491 | } | 491 | } |
| 492 | 492 | ||
| 493 | static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume); | ||
| 494 | #define MRST_PM_OPS (&mrst_pm_ops) | ||
| 495 | |||
| 493 | #else | 496 | #else |
| 494 | #define mrst_suspend NULL | 497 | #define MRST_PM_OPS NULL |
| 495 | #define mrst_resume NULL | ||
| 496 | 498 | ||
| 497 | static inline int mrst_poweroff(struct device *dev) | 499 | static inline int mrst_poweroff(struct device *dev) |
| 498 | { | 500 | { |
| @@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = { | |||
| 529 | .remove = vrtc_mrst_platform_remove, | 531 | .remove = vrtc_mrst_platform_remove, |
| 530 | .shutdown = vrtc_mrst_platform_shutdown, | 532 | .shutdown = vrtc_mrst_platform_shutdown, |
| 531 | .driver = { | 533 | .driver = { |
| 532 | .name = (char *) driver_name, | 534 | .name = driver_name, |
| 533 | .suspend = mrst_suspend, | 535 | .pm = MRST_PM_OPS, |
| 534 | .resume = mrst_resume, | ||
| 535 | } | 536 | } |
| 536 | }; | 537 | }; |
| 537 | 538 | ||
diff --git a/fs/affs/file.c b/fs/affs/file.c index d2468bf95669..a91795e01a7f 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
| @@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
| 699 | boff = tmp % bsize; | 699 | boff = tmp % bsize; |
| 700 | if (boff) { | 700 | if (boff) { |
| 701 | bh = affs_bread_ino(inode, bidx, 0); | 701 | bh = affs_bread_ino(inode, bidx, 0); |
| 702 | if (IS_ERR(bh)) | 702 | if (IS_ERR(bh)) { |
| 703 | return PTR_ERR(bh); | 703 | written = PTR_ERR(bh); |
| 704 | goto err_first_bh; | ||
| 705 | } | ||
| 704 | tmp = min(bsize - boff, to - from); | 706 | tmp = min(bsize - boff, to - from); |
| 705 | BUG_ON(boff + tmp > bsize || tmp > bsize); | 707 | BUG_ON(boff + tmp > bsize || tmp > bsize); |
| 706 | memcpy(AFFS_DATA(bh) + boff, data + from, tmp); | 708 | memcpy(AFFS_DATA(bh) + boff, data + from, tmp); |
| @@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
| 712 | bidx++; | 714 | bidx++; |
| 713 | } else if (bidx) { | 715 | } else if (bidx) { |
| 714 | bh = affs_bread_ino(inode, bidx - 1, 0); | 716 | bh = affs_bread_ino(inode, bidx - 1, 0); |
| 715 | if (IS_ERR(bh)) | 717 | if (IS_ERR(bh)) { |
| 716 | return PTR_ERR(bh); | 718 | written = PTR_ERR(bh); |
| 719 | goto err_first_bh; | ||
| 720 | } | ||
| 717 | } | 721 | } |
| 718 | while (from + bsize <= to) { | 722 | while (from + bsize <= to) { |
| 719 | prev_bh = bh; | 723 | prev_bh = bh; |
| 720 | bh = affs_getemptyblk_ino(inode, bidx); | 724 | bh = affs_getemptyblk_ino(inode, bidx); |
| 721 | if (IS_ERR(bh)) | 725 | if (IS_ERR(bh)) |
| 722 | goto out; | 726 | goto err_bh; |
| 723 | memcpy(AFFS_DATA(bh), data + from, bsize); | 727 | memcpy(AFFS_DATA(bh), data + from, bsize); |
| 724 | if (buffer_new(bh)) { | 728 | if (buffer_new(bh)) { |
| 725 | AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); | 729 | AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); |
| @@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, | |||
| 751 | prev_bh = bh; | 755 | prev_bh = bh; |
| 752 | bh = affs_bread_ino(inode, bidx, 1); | 756 | bh = affs_bread_ino(inode, bidx, 1); |
| 753 | if (IS_ERR(bh)) | 757 | if (IS_ERR(bh)) |
| 754 | goto out; | 758 | goto err_bh; |
| 755 | tmp = min(bsize, to - from); | 759 | tmp = min(bsize, to - from); |
| 756 | BUG_ON(tmp > bsize); | 760 | BUG_ON(tmp > bsize); |
| 757 | memcpy(AFFS_DATA(bh), data + from, tmp); | 761 | memcpy(AFFS_DATA(bh), data + from, tmp); |
| @@ -790,12 +794,13 @@ done: | |||
| 790 | if (tmp > inode->i_size) | 794 | if (tmp > inode->i_size) |
| 791 | inode->i_size = AFFS_I(inode)->mmu_private = tmp; | 795 | inode->i_size = AFFS_I(inode)->mmu_private = tmp; |
| 792 | 796 | ||
| 797 | err_first_bh: | ||
| 793 | unlock_page(page); | 798 | unlock_page(page); |
| 794 | page_cache_release(page); | 799 | page_cache_release(page); |
| 795 | 800 | ||
| 796 | return written; | 801 | return written; |
| 797 | 802 | ||
| 798 | out: | 803 | err_bh: |
| 799 | bh = prev_bh; | 804 | bh = prev_bh; |
| 800 | if (!written) | 805 | if (!written) |
| 801 | written = PTR_ERR(bh); | 806 | written = PTR_ERR(bh); |
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d56094b..754fdf8c6356 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c | |||
| @@ -131,13 +131,16 @@ skip: | |||
| 131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); | 131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); |
| 132 | hfs_bnode_dump(node); | 132 | hfs_bnode_dump(node); |
| 133 | 133 | ||
| 134 | if (new_node) { | 134 | /* |
| 135 | /* update parent key if we inserted a key | 135 | * update parent key if we inserted a key |
| 136 | * at the start of the first node | 136 | * at the start of the node and it is not the new node |
| 137 | */ | 137 | */ |
| 138 | if (!rec && new_node != node) | 138 | if (!rec && new_node != node) { |
| 139 | hfs_brec_update_parent(fd); | 139 | hfs_bnode_read_key(node, fd->search_key, data_off + size); |
| 140 | hfs_brec_update_parent(fd); | ||
| 141 | } | ||
| 140 | 142 | ||
| 143 | if (new_node) { | ||
| 141 | hfs_bnode_put(fd->bnode); | 144 | hfs_bnode_put(fd->bnode); |
| 142 | if (!new_node->parent) { | 145 | if (!new_node->parent) { |
| 143 | hfs_btree_inc_height(tree); | 146 | hfs_btree_inc_height(tree); |
| @@ -168,9 +171,6 @@ skip: | |||
| 168 | goto again; | 171 | goto again; |
| 169 | } | 172 | } |
| 170 | 173 | ||
| 171 | if (!rec) | ||
| 172 | hfs_brec_update_parent(fd); | ||
| 173 | |||
| 174 | return 0; | 174 | return 0; |
| 175 | } | 175 | } |
| 176 | 176 | ||
| @@ -370,6 +370,8 @@ again: | |||
| 370 | if (IS_ERR(parent)) | 370 | if (IS_ERR(parent)) |
| 371 | return PTR_ERR(parent); | 371 | return PTR_ERR(parent); |
| 372 | __hfs_brec_find(parent, fd, hfs_find_rec_by_key); | 372 | __hfs_brec_find(parent, fd, hfs_find_rec_by_key); |
| 373 | if (fd->record < 0) | ||
| 374 | return -ENOENT; | ||
| 373 | hfs_bnode_dump(parent); | 375 | hfs_bnode_dump(parent); |
| 374 | rec = fd->record; | 376 | rec = fd->record; |
| 375 | 377 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..a419b65770d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1625,11 +1625,11 @@ struct task_struct { | |||
| 1625 | 1625 | ||
| 1626 | /* | 1626 | /* |
| 1627 | * numa_faults_locality tracks if faults recorded during the last | 1627 | * numa_faults_locality tracks if faults recorded during the last |
| 1628 | * scan window were remote/local. The task scan period is adapted | 1628 | * scan window were remote/local or failed to migrate. The task scan |
| 1629 | * based on the locality of the faults with different weights | 1629 | * period is adapted based on the locality of the faults with different |
| 1630 | * depending on whether they were shared or private faults | 1630 | * weights depending on whether they were shared or private faults |
| 1631 | */ | 1631 | */ |
| 1632 | unsigned long numa_faults_locality[2]; | 1632 | unsigned long numa_faults_locality[3]; |
| 1633 | 1633 | ||
| 1634 | unsigned long numa_pages_migrated; | 1634 | unsigned long numa_pages_migrated; |
| 1635 | #endif /* CONFIG_NUMA_BALANCING */ | 1635 | #endif /* CONFIG_NUMA_BALANCING */ |
| @@ -1719,6 +1719,7 @@ struct task_struct { | |||
| 1719 | #define TNF_NO_GROUP 0x02 | 1719 | #define TNF_NO_GROUP 0x02 |
| 1720 | #define TNF_SHARED 0x04 | 1720 | #define TNF_SHARED 0x04 |
| 1721 | #define TNF_FAULT_LOCAL 0x08 | 1721 | #define TNF_FAULT_LOCAL 0x08 |
| 1722 | #define TNF_MIGRATE_FAIL 0x10 | ||
| 1722 | 1723 | ||
| 1723 | #ifdef CONFIG_NUMA_BALANCING | 1724 | #ifdef CONFIG_NUMA_BALANCING |
| 1724 | extern void task_numa_fault(int last_node, int node, int pages, int flags); | 1725 | extern void task_numa_fault(int last_node, int node, int pages, int flags); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7ce18f3c097a..bcfe32088b37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p, | |||
| 1609 | /* | 1609 | /* |
| 1610 | * If there were no record hinting faults then either the task is | 1610 | * If there were no record hinting faults then either the task is |
| 1611 | * completely idle or all activity is areas that are not of interest | 1611 | * completely idle or all activity is areas that are not of interest |
| 1612 | * to automatic numa balancing. Scan slower | 1612 | * to automatic numa balancing. Related to that, if there were failed |
| 1613 | * migration then it implies we are migrating too quickly or the local | ||
| 1614 | * node is overloaded. In either case, scan slower | ||
| 1613 | */ | 1615 | */ |
| 1614 | if (local + shared == 0) { | 1616 | if (local + shared == 0 || p->numa_faults_locality[2]) { |
| 1615 | p->numa_scan_period = min(p->numa_scan_period_max, | 1617 | p->numa_scan_period = min(p->numa_scan_period_max, |
| 1616 | p->numa_scan_period << 1); | 1618 | p->numa_scan_period << 1); |
| 1617 | 1619 | ||
| @@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
| 2080 | 2082 | ||
| 2081 | if (migrated) | 2083 | if (migrated) |
| 2082 | p->numa_pages_migrated += pages; | 2084 | p->numa_pages_migrated += pages; |
| 2085 | if (flags & TNF_MIGRATE_FAIL) | ||
| 2086 | p->numa_faults_locality[2] += pages; | ||
| 2083 | 2087 | ||
| 2084 | p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; | 2088 | p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; |
| 2085 | p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; | 2089 | p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 626e93db28ba..6817b0350c71 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1260 | int target_nid, last_cpupid = -1; | 1260 | int target_nid, last_cpupid = -1; |
| 1261 | bool page_locked; | 1261 | bool page_locked; |
| 1262 | bool migrated = false; | 1262 | bool migrated = false; |
| 1263 | bool was_writable; | ||
| 1263 | int flags = 0; | 1264 | int flags = 0; |
| 1264 | 1265 | ||
| 1265 | /* A PROT_NONE fault should not end up here */ | 1266 | /* A PROT_NONE fault should not end up here */ |
| @@ -1291,17 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1291 | flags |= TNF_FAULT_LOCAL; | 1292 | flags |= TNF_FAULT_LOCAL; |
| 1292 | } | 1293 | } |
| 1293 | 1294 | ||
| 1294 | /* | 1295 | /* See similar comment in do_numa_page for explanation */ |
| 1295 | * Avoid grouping on DSO/COW pages in specific and RO pages | 1296 | if (!(vma->vm_flags & VM_WRITE)) |
| 1296 | * in general, RO pages shouldn't hurt as much anyway since | ||
| 1297 | * they can be in shared cache state. | ||
| 1298 | * | ||
| 1299 | * FIXME! This checks "pmd_dirty()" as an approximation of | ||
| 1300 | * "is this a read-only page", since checking "pmd_write()" | ||
| 1301 | * is even more broken. We haven't actually turned this into | ||
| 1302 | * a writable page, so pmd_write() will always be false. | ||
| 1303 | */ | ||
| 1304 | if (!pmd_dirty(pmd)) | ||
| 1305 | flags |= TNF_NO_GROUP; | 1297 | flags |= TNF_NO_GROUP; |
| 1306 | 1298 | ||
| 1307 | /* | 1299 | /* |
| @@ -1358,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1358 | if (migrated) { | 1350 | if (migrated) { |
| 1359 | flags |= TNF_MIGRATED; | 1351 | flags |= TNF_MIGRATED; |
| 1360 | page_nid = target_nid; | 1352 | page_nid = target_nid; |
| 1361 | } | 1353 | } else |
| 1354 | flags |= TNF_MIGRATE_FAIL; | ||
| 1362 | 1355 | ||
| 1363 | goto out; | 1356 | goto out; |
| 1364 | clear_pmdnuma: | 1357 | clear_pmdnuma: |
| 1365 | BUG_ON(!PageLocked(page)); | 1358 | BUG_ON(!PageLocked(page)); |
| 1359 | was_writable = pmd_write(pmd); | ||
| 1366 | pmd = pmd_modify(pmd, vma->vm_page_prot); | 1360 | pmd = pmd_modify(pmd, vma->vm_page_prot); |
| 1361 | pmd = pmd_mkyoung(pmd); | ||
| 1362 | if (was_writable) | ||
| 1363 | pmd = pmd_mkwrite(pmd); | ||
| 1367 | set_pmd_at(mm, haddr, pmdp, pmd); | 1364 | set_pmd_at(mm, haddr, pmdp, pmd); |
| 1368 | update_mmu_cache_pmd(vma, addr, pmdp); | 1365 | update_mmu_cache_pmd(vma, addr, pmdp); |
| 1369 | unlock_page(page); | 1366 | unlock_page(page); |
| @@ -1487,6 +1484,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 1487 | 1484 | ||
| 1488 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { | 1485 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
| 1489 | pmd_t entry; | 1486 | pmd_t entry; |
| 1487 | bool preserve_write = prot_numa && pmd_write(*pmd); | ||
| 1490 | ret = 1; | 1488 | ret = 1; |
| 1491 | 1489 | ||
| 1492 | /* | 1490 | /* |
| @@ -1502,9 +1500,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 1502 | if (!prot_numa || !pmd_protnone(*pmd)) { | 1500 | if (!prot_numa || !pmd_protnone(*pmd)) { |
| 1503 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); | 1501 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); |
| 1504 | entry = pmd_modify(entry, newprot); | 1502 | entry = pmd_modify(entry, newprot); |
| 1503 | if (preserve_write) | ||
| 1504 | entry = pmd_mkwrite(entry); | ||
| 1505 | ret = HPAGE_PMD_NR; | 1505 | ret = HPAGE_PMD_NR; |
| 1506 | set_pmd_at(mm, addr, pmd, entry); | 1506 | set_pmd_at(mm, addr, pmd, entry); |
| 1507 | BUG_ON(pmd_write(entry)); | 1507 | BUG_ON(!preserve_write && pmd_write(entry)); |
| 1508 | } | 1508 | } |
| 1509 | spin_unlock(ptl); | 1509 | spin_unlock(ptl); |
| 1510 | } | 1510 | } |
diff --git a/mm/memory.c b/mm/memory.c index 411144f977b1..97839f5c8c30 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3035 | int last_cpupid; | 3035 | int last_cpupid; |
| 3036 | int target_nid; | 3036 | int target_nid; |
| 3037 | bool migrated = false; | 3037 | bool migrated = false; |
| 3038 | bool was_writable = pte_write(pte); | ||
| 3038 | int flags = 0; | 3039 | int flags = 0; |
| 3039 | 3040 | ||
| 3040 | /* A PROT_NONE fault should not end up here */ | 3041 | /* A PROT_NONE fault should not end up here */ |
| @@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3059 | /* Make it present again */ | 3060 | /* Make it present again */ |
| 3060 | pte = pte_modify(pte, vma->vm_page_prot); | 3061 | pte = pte_modify(pte, vma->vm_page_prot); |
| 3061 | pte = pte_mkyoung(pte); | 3062 | pte = pte_mkyoung(pte); |
| 3063 | if (was_writable) | ||
| 3064 | pte = pte_mkwrite(pte); | ||
| 3062 | set_pte_at(mm, addr, ptep, pte); | 3065 | set_pte_at(mm, addr, ptep, pte); |
| 3063 | update_mmu_cache(vma, addr, ptep); | 3066 | update_mmu_cache(vma, addr, ptep); |
| 3064 | 3067 | ||
| @@ -3069,16 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3069 | } | 3072 | } |
| 3070 | 3073 | ||
| 3071 | /* | 3074 | /* |
| 3072 | * Avoid grouping on DSO/COW pages in specific and RO pages | 3075 | * Avoid grouping on RO pages in general. RO pages shouldn't hurt as |
| 3073 | * in general, RO pages shouldn't hurt as much anyway since | 3076 | * much anyway since they can be in shared cache state. This misses |
| 3074 | * they can be in shared cache state. | 3077 | * the case where a mapping is writable but the process never writes |
| 3075 | * | 3078 | * to it but pte_write gets cleared during protection updates and |
| 3076 | * FIXME! This checks "pmd_dirty()" as an approximation of | 3079 | * pte_dirty has unpredictable behaviour between PTE scan updates, |
| 3077 | * "is this a read-only page", since checking "pmd_write()" | 3080 | * background writeback, dirty balancing and application behaviour. |
| 3078 | * is even more broken. We haven't actually turned this into | ||
| 3079 | * a writable page, so pmd_write() will always be false. | ||
| 3080 | */ | 3081 | */ |
| 3081 | if (!pte_dirty(pte)) | 3082 | if (!(vma->vm_flags & VM_WRITE)) |
| 3082 | flags |= TNF_NO_GROUP; | 3083 | flags |= TNF_NO_GROUP; |
| 3083 | 3084 | ||
| 3084 | /* | 3085 | /* |
| @@ -3102,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3102 | if (migrated) { | 3103 | if (migrated) { |
| 3103 | page_nid = target_nid; | 3104 | page_nid = target_nid; |
| 3104 | flags |= TNF_MIGRATED; | 3105 | flags |= TNF_MIGRATED; |
| 3105 | } | 3106 | } else |
| 3107 | flags |= TNF_MIGRATE_FAIL; | ||
| 3106 | 3108 | ||
| 3107 | out: | 3109 | out: |
| 3108 | if (page_nid != -1) | 3110 | if (page_nid != -1) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab10795bea..65842d688b7c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
| 1092 | return NULL; | 1092 | return NULL; |
| 1093 | 1093 | ||
| 1094 | arch_refresh_nodedata(nid, pgdat); | 1094 | arch_refresh_nodedata(nid, pgdat); |
| 1095 | } else { | ||
| 1096 | /* Reset the nr_zones and classzone_idx to 0 before reuse */ | ||
| 1097 | pgdat->nr_zones = 0; | ||
| 1098 | pgdat->classzone_idx = 0; | ||
| 1095 | } | 1099 | } |
| 1096 | 1100 | ||
| 1097 | /* we can use NODE_DATA(nid) from here */ | 1101 | /* we can use NODE_DATA(nid) from here */ |
| @@ -1977,15 +1981,6 @@ void try_offline_node(int nid) | |||
| 1977 | if (is_vmalloc_addr(zone->wait_table)) | 1981 | if (is_vmalloc_addr(zone->wait_table)) |
| 1978 | vfree(zone->wait_table); | 1982 | vfree(zone->wait_table); |
| 1979 | } | 1983 | } |
| 1980 | |||
| 1981 | /* | ||
| 1982 | * Since there is no way to guarentee the address of pgdat/zone is not | ||
| 1983 | * on stack of any kernel threads or used by other kernel objects | ||
| 1984 | * without reference counting or other symchronizing method, do not | ||
| 1985 | * reset node_data and free pgdat here. Just reset it to 0 and reuse | ||
| 1986 | * the memory when the node is online again. | ||
| 1987 | */ | ||
| 1988 | memset(pgdat, 0, sizeof(*pgdat)); | ||
| 1989 | } | 1984 | } |
| 1990 | EXPORT_SYMBOL(try_offline_node); | 1985 | EXPORT_SYMBOL(try_offline_node); |
| 1991 | 1986 | ||
| @@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
| 774 | 774 | ||
| 775 | importer->anon_vma = exporter->anon_vma; | 775 | importer->anon_vma = exporter->anon_vma; |
| 776 | error = anon_vma_clone(importer, exporter); | 776 | error = anon_vma_clone(importer, exporter); |
| 777 | if (error) { | 777 | if (error) |
| 778 | importer->anon_vma = NULL; | ||
| 779 | return error; | 778 | return error; |
| 780 | } | ||
| 781 | } | 779 | } |
| 782 | } | 780 | } |
| 783 | 781 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 44727811bf4c..88584838e704 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
| @@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 75 | oldpte = *pte; | 75 | oldpte = *pte; |
| 76 | if (pte_present(oldpte)) { | 76 | if (pte_present(oldpte)) { |
| 77 | pte_t ptent; | 77 | pte_t ptent; |
| 78 | bool preserve_write = prot_numa && pte_write(oldpte); | ||
| 78 | 79 | ||
| 79 | /* | 80 | /* |
| 80 | * Avoid trapping faults against the zero or KSM | 81 | * Avoid trapping faults against the zero or KSM |
| @@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 94 | 95 | ||
| 95 | ptent = ptep_modify_prot_start(mm, addr, pte); | 96 | ptent = ptep_modify_prot_start(mm, addr, pte); |
| 96 | ptent = pte_modify(ptent, newprot); | 97 | ptent = pte_modify(ptent, newprot); |
| 98 | if (preserve_write) | ||
| 99 | ptent = pte_mkwrite(ptent); | ||
| 97 | 100 | ||
| 98 | /* Avoid taking write faults for known dirty pages */ | 101 | /* Avoid taking write faults for known dirty pages */ |
| 99 | if (dirty_accountable && pte_dirty(ptent) && | 102 | if (dirty_accountable && pte_dirty(ptent) && |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac381ab3..755a42c76eb4 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
| @@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) | |||
| 103 | 103 | ||
| 104 | if (!is_migrate_isolate_page(buddy)) { | 104 | if (!is_migrate_isolate_page(buddy)) { |
| 105 | __isolate_free_page(page, order); | 105 | __isolate_free_page(page, order); |
| 106 | kernel_map_pages(page, (1 << order), 1); | ||
| 106 | set_page_refcounted(page); | 107 | set_page_refcounted(page); |
| 107 | isolated_page = page; | 108 | isolated_page = page; |
| 108 | } | 109 | } |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 75c1f2878519..29f2f8b853ae 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
| @@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end, | |||
| 265 | vma = vma->vm_next; | 265 | vma = vma->vm_next; |
| 266 | 266 | ||
| 267 | err = walk_page_test(start, next, walk); | 267 | err = walk_page_test(start, next, walk); |
| 268 | if (err > 0) | 268 | if (err > 0) { |
| 269 | /* | ||
| 270 | * positive return values are purely for | ||
| 271 | * controlling the pagewalk, so should never | ||
| 272 | * be passed to the callers. | ||
| 273 | */ | ||
| 274 | err = 0; | ||
| 269 | continue; | 275 | continue; |
| 276 | } | ||
| 270 | if (err < 0) | 277 | if (err < 0) |
| 271 | break; | 278 | break; |
| 272 | } | 279 | } |
| @@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
| 287 | return 0; | 287 | return 0; |
| 288 | 288 | ||
| 289 | enomem_failure: | 289 | enomem_failure: |
| 290 | /* | ||
| 291 | * dst->anon_vma is dropped here otherwise its degree can be incorrectly | ||
| 292 | * decremented in unlink_anon_vmas(). | ||
| 293 | * We can safely do this because callers of anon_vma_clone() don't care | ||
| 294 | * about dst->anon_vma if anon_vma_clone() failed. | ||
| 295 | */ | ||
| 296 | dst->anon_vma = NULL; | ||
| 290 | unlink_anon_vmas(dst); | 297 | unlink_anon_vmas(dst); |
| 291 | return -ENOMEM; | 298 | return -ENOMEM; |
| 292 | } | 299 | } |
| @@ -2449,7 +2449,8 @@ redo: | |||
| 2449 | do { | 2449 | do { |
| 2450 | tid = this_cpu_read(s->cpu_slab->tid); | 2450 | tid = this_cpu_read(s->cpu_slab->tid); |
| 2451 | c = raw_cpu_ptr(s->cpu_slab); | 2451 | c = raw_cpu_ptr(s->cpu_slab); |
| 2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
| 2453 | unlikely(tid != READ_ONCE(c->tid))); | ||
| 2453 | 2454 | ||
| 2454 | /* | 2455 | /* |
| 2455 | * Irqless object alloc/free algorithm used here depends on sequence | 2456 | * Irqless object alloc/free algorithm used here depends on sequence |
| @@ -2718,7 +2719,8 @@ redo: | |||
| 2718 | do { | 2719 | do { |
| 2719 | tid = this_cpu_read(s->cpu_slab->tid); | 2720 | tid = this_cpu_read(s->cpu_slab->tid); |
| 2720 | c = raw_cpu_ptr(s->cpu_slab); | 2721 | c = raw_cpu_ptr(s->cpu_slab); |
| 2721 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2722 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
| 2723 | unlikely(tid != READ_ONCE(c->tid))); | ||
| 2722 | 2724 | ||
| 2723 | /* Same with comment on barrier() in slab_alloc_node() */ | 2725 | /* Same with comment on barrier() in slab_alloc_node() */ |
| 2724 | barrier(); | 2726 | barrier(); |
