diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-25 19:21:17 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-25 19:21:17 -0400 |
| commit | 9c8e30d12d46461c283608d3123debc4e75d4f8b (patch) | |
| tree | c354074a091c9bacc700a2dc861085da8a7e0385 /mm | |
| parent | b8517e98305e3c76fa293133826afe39a690edcd (diff) | |
| parent | b7b04004ecd9e58cdc6c6ff92f251d5ac5c0adb2 (diff) | |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"15 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm: numa: mark huge PTEs young when clearing NUMA hinting faults
mm: numa: slow PTE scan rate if migration failures occur
mm: numa: preserve PTE write permissions across a NUMA hinting fault
mm: numa: group related processes based on VMA flags instead of page table flags
hfsplus: fix B-tree corruption after insertion at position 0
MAINTAINERS: add Jan as DMI/SMBIOS support maintainer
fs/affs/file.c: unlock/release page on error
mm/page_alloc.c: call kernel_map_pages in unset_migrateype_isolate
mm/slub: fix lockups on PREEMPT && !SMP kernels
mm/memory hotplug: postpone the reset of obsolete pgdat
MAINTAINERS: correct rtc armada38x pattern entry
mm/pagewalk.c: prevent positive return value of walk_page_test() from being passed to callers
mm: fix anon_vma->degree underflow in anon_vma endless growing prevention
drivers/rtc/rtc-mrst: fix suspend/resume
aoe: update aoe maintainer information
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/huge_memory.c | 26 | ||||
| -rw-r--r-- | mm/memory.c | 22 | ||||
| -rw-r--r-- | mm/memory_hotplug.c | 13 | ||||
| -rw-r--r-- | mm/mmap.c | 4 | ||||
| -rw-r--r-- | mm/mprotect.c | 3 | ||||
| -rw-r--r-- | mm/page_isolation.c | 1 | ||||
| -rw-r--r-- | mm/pagewalk.c | 9 | ||||
| -rw-r--r-- | mm/rmap.c | 7 | ||||
| -rw-r--r-- | mm/slub.c | 6 |
9 files changed, 53 insertions, 38 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 626e93db28ba..6817b0350c71 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1260 | int target_nid, last_cpupid = -1; | 1260 | int target_nid, last_cpupid = -1; |
| 1261 | bool page_locked; | 1261 | bool page_locked; |
| 1262 | bool migrated = false; | 1262 | bool migrated = false; |
| 1263 | bool was_writable; | ||
| 1263 | int flags = 0; | 1264 | int flags = 0; |
| 1264 | 1265 | ||
| 1265 | /* A PROT_NONE fault should not end up here */ | 1266 | /* A PROT_NONE fault should not end up here */ |
| @@ -1291,17 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1291 | flags |= TNF_FAULT_LOCAL; | 1292 | flags |= TNF_FAULT_LOCAL; |
| 1292 | } | 1293 | } |
| 1293 | 1294 | ||
| 1294 | /* | 1295 | /* See similar comment in do_numa_page for explanation */ |
| 1295 | * Avoid grouping on DSO/COW pages in specific and RO pages | 1296 | if (!(vma->vm_flags & VM_WRITE)) |
| 1296 | * in general, RO pages shouldn't hurt as much anyway since | ||
| 1297 | * they can be in shared cache state. | ||
| 1298 | * | ||
| 1299 | * FIXME! This checks "pmd_dirty()" as an approximation of | ||
| 1300 | * "is this a read-only page", since checking "pmd_write()" | ||
| 1301 | * is even more broken. We haven't actually turned this into | ||
| 1302 | * a writable page, so pmd_write() will always be false. | ||
| 1303 | */ | ||
| 1304 | if (!pmd_dirty(pmd)) | ||
| 1305 | flags |= TNF_NO_GROUP; | 1297 | flags |= TNF_NO_GROUP; |
| 1306 | 1298 | ||
| 1307 | /* | 1299 | /* |
| @@ -1358,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1358 | if (migrated) { | 1350 | if (migrated) { |
| 1359 | flags |= TNF_MIGRATED; | 1351 | flags |= TNF_MIGRATED; |
| 1360 | page_nid = target_nid; | 1352 | page_nid = target_nid; |
| 1361 | } | 1353 | } else |
| 1354 | flags |= TNF_MIGRATE_FAIL; | ||
| 1362 | 1355 | ||
| 1363 | goto out; | 1356 | goto out; |
| 1364 | clear_pmdnuma: | 1357 | clear_pmdnuma: |
| 1365 | BUG_ON(!PageLocked(page)); | 1358 | BUG_ON(!PageLocked(page)); |
| 1359 | was_writable = pmd_write(pmd); | ||
| 1366 | pmd = pmd_modify(pmd, vma->vm_page_prot); | 1360 | pmd = pmd_modify(pmd, vma->vm_page_prot); |
| 1361 | pmd = pmd_mkyoung(pmd); | ||
| 1362 | if (was_writable) | ||
| 1363 | pmd = pmd_mkwrite(pmd); | ||
| 1367 | set_pmd_at(mm, haddr, pmdp, pmd); | 1364 | set_pmd_at(mm, haddr, pmdp, pmd); |
| 1368 | update_mmu_cache_pmd(vma, addr, pmdp); | 1365 | update_mmu_cache_pmd(vma, addr, pmdp); |
| 1369 | unlock_page(page); | 1366 | unlock_page(page); |
| @@ -1487,6 +1484,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 1487 | 1484 | ||
| 1488 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { | 1485 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
| 1489 | pmd_t entry; | 1486 | pmd_t entry; |
| 1487 | bool preserve_write = prot_numa && pmd_write(*pmd); | ||
| 1490 | ret = 1; | 1488 | ret = 1; |
| 1491 | 1489 | ||
| 1492 | /* | 1490 | /* |
| @@ -1502,9 +1500,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 1502 | if (!prot_numa || !pmd_protnone(*pmd)) { | 1500 | if (!prot_numa || !pmd_protnone(*pmd)) { |
| 1503 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); | 1501 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); |
| 1504 | entry = pmd_modify(entry, newprot); | 1502 | entry = pmd_modify(entry, newprot); |
| 1503 | if (preserve_write) | ||
| 1504 | entry = pmd_mkwrite(entry); | ||
| 1505 | ret = HPAGE_PMD_NR; | 1505 | ret = HPAGE_PMD_NR; |
| 1506 | set_pmd_at(mm, addr, pmd, entry); | 1506 | set_pmd_at(mm, addr, pmd, entry); |
| 1507 | BUG_ON(pmd_write(entry)); | 1507 | BUG_ON(!preserve_write && pmd_write(entry)); |
| 1508 | } | 1508 | } |
| 1509 | spin_unlock(ptl); | 1509 | spin_unlock(ptl); |
| 1510 | } | 1510 | } |
diff --git a/mm/memory.c b/mm/memory.c index 411144f977b1..97839f5c8c30 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3035 | int last_cpupid; | 3035 | int last_cpupid; |
| 3036 | int target_nid; | 3036 | int target_nid; |
| 3037 | bool migrated = false; | 3037 | bool migrated = false; |
| 3038 | bool was_writable = pte_write(pte); | ||
| 3038 | int flags = 0; | 3039 | int flags = 0; |
| 3039 | 3040 | ||
| 3040 | /* A PROT_NONE fault should not end up here */ | 3041 | /* A PROT_NONE fault should not end up here */ |
| @@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3059 | /* Make it present again */ | 3060 | /* Make it present again */ |
| 3060 | pte = pte_modify(pte, vma->vm_page_prot); | 3061 | pte = pte_modify(pte, vma->vm_page_prot); |
| 3061 | pte = pte_mkyoung(pte); | 3062 | pte = pte_mkyoung(pte); |
| 3063 | if (was_writable) | ||
| 3064 | pte = pte_mkwrite(pte); | ||
| 3062 | set_pte_at(mm, addr, ptep, pte); | 3065 | set_pte_at(mm, addr, ptep, pte); |
| 3063 | update_mmu_cache(vma, addr, ptep); | 3066 | update_mmu_cache(vma, addr, ptep); |
| 3064 | 3067 | ||
| @@ -3069,16 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3069 | } | 3072 | } |
| 3070 | 3073 | ||
| 3071 | /* | 3074 | /* |
| 3072 | * Avoid grouping on DSO/COW pages in specific and RO pages | 3075 | * Avoid grouping on RO pages in general. RO pages shouldn't hurt as |
| 3073 | * in general, RO pages shouldn't hurt as much anyway since | 3076 | * much anyway since they can be in shared cache state. This misses |
| 3074 | * they can be in shared cache state. | 3077 | * the case where a mapping is writable but the process never writes |
| 3075 | * | 3078 | * to it but pte_write gets cleared during protection updates and |
| 3076 | * FIXME! This checks "pmd_dirty()" as an approximation of | 3079 | * pte_dirty has unpredictable behaviour between PTE scan updates, |
| 3077 | * "is this a read-only page", since checking "pmd_write()" | 3080 | * background writeback, dirty balancing and application behaviour. |
| 3078 | * is even more broken. We haven't actually turned this into | ||
| 3079 | * a writable page, so pmd_write() will always be false. | ||
| 3080 | */ | 3081 | */ |
| 3081 | if (!pte_dirty(pte)) | 3082 | if (!(vma->vm_flags & VM_WRITE)) |
| 3082 | flags |= TNF_NO_GROUP; | 3083 | flags |= TNF_NO_GROUP; |
| 3083 | 3084 | ||
| 3084 | /* | 3085 | /* |
| @@ -3102,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3102 | if (migrated) { | 3103 | if (migrated) { |
| 3103 | page_nid = target_nid; | 3104 | page_nid = target_nid; |
| 3104 | flags |= TNF_MIGRATED; | 3105 | flags |= TNF_MIGRATED; |
| 3105 | } | 3106 | } else |
| 3107 | flags |= TNF_MIGRATE_FAIL; | ||
| 3106 | 3108 | ||
| 3107 | out: | 3109 | out: |
| 3108 | if (page_nid != -1) | 3110 | if (page_nid != -1) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab10795bea..65842d688b7c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
| @@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
| 1092 | return NULL; | 1092 | return NULL; |
| 1093 | 1093 | ||
| 1094 | arch_refresh_nodedata(nid, pgdat); | 1094 | arch_refresh_nodedata(nid, pgdat); |
| 1095 | } else { | ||
| 1096 | /* Reset the nr_zones and classzone_idx to 0 before reuse */ | ||
| 1097 | pgdat->nr_zones = 0; | ||
| 1098 | pgdat->classzone_idx = 0; | ||
| 1095 | } | 1099 | } |
| 1096 | 1100 | ||
| 1097 | /* we can use NODE_DATA(nid) from here */ | 1101 | /* we can use NODE_DATA(nid) from here */ |
| @@ -1977,15 +1981,6 @@ void try_offline_node(int nid) | |||
| 1977 | if (is_vmalloc_addr(zone->wait_table)) | 1981 | if (is_vmalloc_addr(zone->wait_table)) |
| 1978 | vfree(zone->wait_table); | 1982 | vfree(zone->wait_table); |
| 1979 | } | 1983 | } |
| 1980 | |||
| 1981 | /* | ||
| 1982 | * Since there is no way to guarentee the address of pgdat/zone is not | ||
| 1983 | * on stack of any kernel threads or used by other kernel objects | ||
| 1984 | * without reference counting or other symchronizing method, do not | ||
| 1985 | * reset node_data and free pgdat here. Just reset it to 0 and reuse | ||
| 1986 | * the memory when the node is online again. | ||
| 1987 | */ | ||
| 1988 | memset(pgdat, 0, sizeof(*pgdat)); | ||
| 1989 | } | 1984 | } |
| 1990 | EXPORT_SYMBOL(try_offline_node); | 1985 | EXPORT_SYMBOL(try_offline_node); |
| 1991 | 1986 | ||
| @@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
| 774 | 774 | ||
| 775 | importer->anon_vma = exporter->anon_vma; | 775 | importer->anon_vma = exporter->anon_vma; |
| 776 | error = anon_vma_clone(importer, exporter); | 776 | error = anon_vma_clone(importer, exporter); |
| 777 | if (error) { | 777 | if (error) |
| 778 | importer->anon_vma = NULL; | ||
| 779 | return error; | 778 | return error; |
| 780 | } | ||
| 781 | } | 779 | } |
| 782 | } | 780 | } |
| 783 | 781 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 44727811bf4c..88584838e704 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
| @@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 75 | oldpte = *pte; | 75 | oldpte = *pte; |
| 76 | if (pte_present(oldpte)) { | 76 | if (pte_present(oldpte)) { |
| 77 | pte_t ptent; | 77 | pte_t ptent; |
| 78 | bool preserve_write = prot_numa && pte_write(oldpte); | ||
| 78 | 79 | ||
| 79 | /* | 80 | /* |
| 80 | * Avoid trapping faults against the zero or KSM | 81 | * Avoid trapping faults against the zero or KSM |
| @@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
| 94 | 95 | ||
| 95 | ptent = ptep_modify_prot_start(mm, addr, pte); | 96 | ptent = ptep_modify_prot_start(mm, addr, pte); |
| 96 | ptent = pte_modify(ptent, newprot); | 97 | ptent = pte_modify(ptent, newprot); |
| 98 | if (preserve_write) | ||
| 99 | ptent = pte_mkwrite(ptent); | ||
| 97 | 100 | ||
| 98 | /* Avoid taking write faults for known dirty pages */ | 101 | /* Avoid taking write faults for known dirty pages */ |
| 99 | if (dirty_accountable && pte_dirty(ptent) && | 102 | if (dirty_accountable && pte_dirty(ptent) && |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac381ab3..755a42c76eb4 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
| @@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) | |||
| 103 | 103 | ||
| 104 | if (!is_migrate_isolate_page(buddy)) { | 104 | if (!is_migrate_isolate_page(buddy)) { |
| 105 | __isolate_free_page(page, order); | 105 | __isolate_free_page(page, order); |
| 106 | kernel_map_pages(page, (1 << order), 1); | ||
| 106 | set_page_refcounted(page); | 107 | set_page_refcounted(page); |
| 107 | isolated_page = page; | 108 | isolated_page = page; |
| 108 | } | 109 | } |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 75c1f2878519..29f2f8b853ae 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
| @@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end, | |||
| 265 | vma = vma->vm_next; | 265 | vma = vma->vm_next; |
| 266 | 266 | ||
| 267 | err = walk_page_test(start, next, walk); | 267 | err = walk_page_test(start, next, walk); |
| 268 | if (err > 0) | 268 | if (err > 0) { |
| 269 | /* | ||
| 270 | * positive return values are purely for | ||
| 271 | * controlling the pagewalk, so should never | ||
| 272 | * be passed to the callers. | ||
| 273 | */ | ||
| 274 | err = 0; | ||
| 269 | continue; | 275 | continue; |
| 276 | } | ||
| 270 | if (err < 0) | 277 | if (err < 0) |
| 271 | break; | 278 | break; |
| 272 | } | 279 | } |
| @@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
| 287 | return 0; | 287 | return 0; |
| 288 | 288 | ||
| 289 | enomem_failure: | 289 | enomem_failure: |
| 290 | /* | ||
| 291 | * dst->anon_vma is dropped here otherwise its degree can be incorrectly | ||
| 292 | * decremented in unlink_anon_vmas(). | ||
| 293 | * We can safely do this because callers of anon_vma_clone() don't care | ||
| 294 | * about dst->anon_vma if anon_vma_clone() failed. | ||
| 295 | */ | ||
| 296 | dst->anon_vma = NULL; | ||
| 290 | unlink_anon_vmas(dst); | 297 | unlink_anon_vmas(dst); |
| 291 | return -ENOMEM; | 298 | return -ENOMEM; |
| 292 | } | 299 | } |
| @@ -2449,7 +2449,8 @@ redo: | |||
| 2449 | do { | 2449 | do { |
| 2450 | tid = this_cpu_read(s->cpu_slab->tid); | 2450 | tid = this_cpu_read(s->cpu_slab->tid); |
| 2451 | c = raw_cpu_ptr(s->cpu_slab); | 2451 | c = raw_cpu_ptr(s->cpu_slab); |
| 2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
| 2453 | unlikely(tid != READ_ONCE(c->tid))); | ||
| 2453 | 2454 | ||
| 2454 | /* | 2455 | /* |
| 2455 | * Irqless object alloc/free algorithm used here depends on sequence | 2456 | * Irqless object alloc/free algorithm used here depends on sequence |
| @@ -2718,7 +2719,8 @@ redo: | |||
| 2718 | do { | 2719 | do { |
| 2719 | tid = this_cpu_read(s->cpu_slab->tid); | 2720 | tid = this_cpu_read(s->cpu_slab->tid); |
| 2720 | c = raw_cpu_ptr(s->cpu_slab); | 2721 | c = raw_cpu_ptr(s->cpu_slab); |
| 2721 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2722 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
| 2723 | unlikely(tid != READ_ONCE(c->tid))); | ||
| 2722 | 2724 | ||
| 2723 | /* Same with comment on barrier() in slab_alloc_node() */ | 2725 | /* Same with comment on barrier() in slab_alloc_node() */ |
| 2724 | barrier(); | 2726 | barrier(); |
