diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-25 19:21:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-25 19:21:17 -0400 |
commit | 9c8e30d12d46461c283608d3123debc4e75d4f8b (patch) | |
tree | c354074a091c9bacc700a2dc861085da8a7e0385 /mm | |
parent | b8517e98305e3c76fa293133826afe39a690edcd (diff) | |
parent | b7b04004ecd9e58cdc6c6ff92f251d5ac5c0adb2 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"15 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm: numa: mark huge PTEs young when clearing NUMA hinting faults
mm: numa: slow PTE scan rate if migration failures occur
mm: numa: preserve PTE write permissions across a NUMA hinting fault
mm: numa: group related processes based on VMA flags instead of page table flags
hfsplus: fix B-tree corruption after insertion at position 0
MAINTAINERS: add Jan as DMI/SMBIOS support maintainer
fs/affs/file.c: unlock/release page on error
mm/page_alloc.c: call kernel_map_pages in unset_migrateype_isolate
mm/slub: fix lockups on PREEMPT && !SMP kernels
mm/memory hotplug: postpone the reset of obsolete pgdat
MAINTAINERS: correct rtc armada38x pattern entry
mm/pagewalk.c: prevent positive return value of walk_page_test() from being passed to callers
mm: fix anon_vma->degree underflow in anon_vma endless growing prevention
drivers/rtc/rtc-mrst: fix suspend/resume
aoe: update aoe maintainer information
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 26 | ||||
-rw-r--r-- | mm/memory.c | 22 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 13 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/mprotect.c | 3 | ||||
-rw-r--r-- | mm/page_isolation.c | 1 | ||||
-rw-r--r-- | mm/pagewalk.c | 9 | ||||
-rw-r--r-- | mm/rmap.c | 7 | ||||
-rw-r--r-- | mm/slub.c | 6 |
9 files changed, 53 insertions, 38 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 626e93db28ba..6817b0350c71 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1260 | int target_nid, last_cpupid = -1; | 1260 | int target_nid, last_cpupid = -1; |
1261 | bool page_locked; | 1261 | bool page_locked; |
1262 | bool migrated = false; | 1262 | bool migrated = false; |
1263 | bool was_writable; | ||
1263 | int flags = 0; | 1264 | int flags = 0; |
1264 | 1265 | ||
1265 | /* A PROT_NONE fault should not end up here */ | 1266 | /* A PROT_NONE fault should not end up here */ |
@@ -1291,17 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1291 | flags |= TNF_FAULT_LOCAL; | 1292 | flags |= TNF_FAULT_LOCAL; |
1292 | } | 1293 | } |
1293 | 1294 | ||
1294 | /* | 1295 | /* See similar comment in do_numa_page for explanation */ |
1295 | * Avoid grouping on DSO/COW pages in specific and RO pages | 1296 | if (!(vma->vm_flags & VM_WRITE)) |
1296 | * in general, RO pages shouldn't hurt as much anyway since | ||
1297 | * they can be in shared cache state. | ||
1298 | * | ||
1299 | * FIXME! This checks "pmd_dirty()" as an approximation of | ||
1300 | * "is this a read-only page", since checking "pmd_write()" | ||
1301 | * is even more broken. We haven't actually turned this into | ||
1302 | * a writable page, so pmd_write() will always be false. | ||
1303 | */ | ||
1304 | if (!pmd_dirty(pmd)) | ||
1305 | flags |= TNF_NO_GROUP; | 1297 | flags |= TNF_NO_GROUP; |
1306 | 1298 | ||
1307 | /* | 1299 | /* |
@@ -1358,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1358 | if (migrated) { | 1350 | if (migrated) { |
1359 | flags |= TNF_MIGRATED; | 1351 | flags |= TNF_MIGRATED; |
1360 | page_nid = target_nid; | 1352 | page_nid = target_nid; |
1361 | } | 1353 | } else |
1354 | flags |= TNF_MIGRATE_FAIL; | ||
1362 | 1355 | ||
1363 | goto out; | 1356 | goto out; |
1364 | clear_pmdnuma: | 1357 | clear_pmdnuma: |
1365 | BUG_ON(!PageLocked(page)); | 1358 | BUG_ON(!PageLocked(page)); |
1359 | was_writable = pmd_write(pmd); | ||
1366 | pmd = pmd_modify(pmd, vma->vm_page_prot); | 1360 | pmd = pmd_modify(pmd, vma->vm_page_prot); |
1361 | pmd = pmd_mkyoung(pmd); | ||
1362 | if (was_writable) | ||
1363 | pmd = pmd_mkwrite(pmd); | ||
1367 | set_pmd_at(mm, haddr, pmdp, pmd); | 1364 | set_pmd_at(mm, haddr, pmdp, pmd); |
1368 | update_mmu_cache_pmd(vma, addr, pmdp); | 1365 | update_mmu_cache_pmd(vma, addr, pmdp); |
1369 | unlock_page(page); | 1366 | unlock_page(page); |
@@ -1487,6 +1484,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1487 | 1484 | ||
1488 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { | 1485 | if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
1489 | pmd_t entry; | 1486 | pmd_t entry; |
1487 | bool preserve_write = prot_numa && pmd_write(*pmd); | ||
1490 | ret = 1; | 1488 | ret = 1; |
1491 | 1489 | ||
1492 | /* | 1490 | /* |
@@ -1502,9 +1500,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1502 | if (!prot_numa || !pmd_protnone(*pmd)) { | 1500 | if (!prot_numa || !pmd_protnone(*pmd)) { |
1503 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); | 1501 | entry = pmdp_get_and_clear_notify(mm, addr, pmd); |
1504 | entry = pmd_modify(entry, newprot); | 1502 | entry = pmd_modify(entry, newprot); |
1503 | if (preserve_write) | ||
1504 | entry = pmd_mkwrite(entry); | ||
1505 | ret = HPAGE_PMD_NR; | 1505 | ret = HPAGE_PMD_NR; |
1506 | set_pmd_at(mm, addr, pmd, entry); | 1506 | set_pmd_at(mm, addr, pmd, entry); |
1507 | BUG_ON(pmd_write(entry)); | 1507 | BUG_ON(!preserve_write && pmd_write(entry)); |
1508 | } | 1508 | } |
1509 | spin_unlock(ptl); | 1509 | spin_unlock(ptl); |
1510 | } | 1510 | } |
diff --git a/mm/memory.c b/mm/memory.c index 411144f977b1..97839f5c8c30 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3035 | int last_cpupid; | 3035 | int last_cpupid; |
3036 | int target_nid; | 3036 | int target_nid; |
3037 | bool migrated = false; | 3037 | bool migrated = false; |
3038 | bool was_writable = pte_write(pte); | ||
3038 | int flags = 0; | 3039 | int flags = 0; |
3039 | 3040 | ||
3040 | /* A PROT_NONE fault should not end up here */ | 3041 | /* A PROT_NONE fault should not end up here */ |
@@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3059 | /* Make it present again */ | 3060 | /* Make it present again */ |
3060 | pte = pte_modify(pte, vma->vm_page_prot); | 3061 | pte = pte_modify(pte, vma->vm_page_prot); |
3061 | pte = pte_mkyoung(pte); | 3062 | pte = pte_mkyoung(pte); |
3063 | if (was_writable) | ||
3064 | pte = pte_mkwrite(pte); | ||
3062 | set_pte_at(mm, addr, ptep, pte); | 3065 | set_pte_at(mm, addr, ptep, pte); |
3063 | update_mmu_cache(vma, addr, ptep); | 3066 | update_mmu_cache(vma, addr, ptep); |
3064 | 3067 | ||
@@ -3069,16 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3069 | } | 3072 | } |
3070 | 3073 | ||
3071 | /* | 3074 | /* |
3072 | * Avoid grouping on DSO/COW pages in specific and RO pages | 3075 | * Avoid grouping on RO pages in general. RO pages shouldn't hurt as |
3073 | * in general, RO pages shouldn't hurt as much anyway since | 3076 | * much anyway since they can be in shared cache state. This misses |
3074 | * they can be in shared cache state. | 3077 | * the case where a mapping is writable but the process never writes |
3075 | * | 3078 | * to it but pte_write gets cleared during protection updates and |
3076 | * FIXME! This checks "pmd_dirty()" as an approximation of | 3079 | * pte_dirty has unpredictable behaviour between PTE scan updates, |
3077 | * "is this a read-only page", since checking "pmd_write()" | 3080 | * background writeback, dirty balancing and application behaviour. |
3078 | * is even more broken. We haven't actually turned this into | ||
3079 | * a writable page, so pmd_write() will always be false. | ||
3080 | */ | 3081 | */ |
3081 | if (!pte_dirty(pte)) | 3082 | if (!(vma->vm_flags & VM_WRITE)) |
3082 | flags |= TNF_NO_GROUP; | 3083 | flags |= TNF_NO_GROUP; |
3083 | 3084 | ||
3084 | /* | 3085 | /* |
@@ -3102,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3102 | if (migrated) { | 3103 | if (migrated) { |
3103 | page_nid = target_nid; | 3104 | page_nid = target_nid; |
3104 | flags |= TNF_MIGRATED; | 3105 | flags |= TNF_MIGRATED; |
3105 | } | 3106 | } else |
3107 | flags |= TNF_MIGRATE_FAIL; | ||
3106 | 3108 | ||
3107 | out: | 3109 | out: |
3108 | if (page_nid != -1) | 3110 | if (page_nid != -1) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab10795bea..65842d688b7c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
1092 | return NULL; | 1092 | return NULL; |
1093 | 1093 | ||
1094 | arch_refresh_nodedata(nid, pgdat); | 1094 | arch_refresh_nodedata(nid, pgdat); |
1095 | } else { | ||
1096 | /* Reset the nr_zones and classzone_idx to 0 before reuse */ | ||
1097 | pgdat->nr_zones = 0; | ||
1098 | pgdat->classzone_idx = 0; | ||
1095 | } | 1099 | } |
1096 | 1100 | ||
1097 | /* we can use NODE_DATA(nid) from here */ | 1101 | /* we can use NODE_DATA(nid) from here */ |
@@ -1977,15 +1981,6 @@ void try_offline_node(int nid) | |||
1977 | if (is_vmalloc_addr(zone->wait_table)) | 1981 | if (is_vmalloc_addr(zone->wait_table)) |
1978 | vfree(zone->wait_table); | 1982 | vfree(zone->wait_table); |
1979 | } | 1983 | } |
1980 | |||
1981 | /* | ||
1982 | * Since there is no way to guarentee the address of pgdat/zone is not | ||
1983 | * on stack of any kernel threads or used by other kernel objects | ||
1984 | * without reference counting or other symchronizing method, do not | ||
1985 | * reset node_data and free pgdat here. Just reset it to 0 and reuse | ||
1986 | * the memory when the node is online again. | ||
1987 | */ | ||
1988 | memset(pgdat, 0, sizeof(*pgdat)); | ||
1989 | } | 1984 | } |
1990 | EXPORT_SYMBOL(try_offline_node); | 1985 | EXPORT_SYMBOL(try_offline_node); |
1991 | 1986 | ||
@@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
774 | 774 | ||
775 | importer->anon_vma = exporter->anon_vma; | 775 | importer->anon_vma = exporter->anon_vma; |
776 | error = anon_vma_clone(importer, exporter); | 776 | error = anon_vma_clone(importer, exporter); |
777 | if (error) { | 777 | if (error) |
778 | importer->anon_vma = NULL; | ||
779 | return error; | 778 | return error; |
780 | } | ||
781 | } | 779 | } |
782 | } | 780 | } |
783 | 781 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 44727811bf4c..88584838e704 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
75 | oldpte = *pte; | 75 | oldpte = *pte; |
76 | if (pte_present(oldpte)) { | 76 | if (pte_present(oldpte)) { |
77 | pte_t ptent; | 77 | pte_t ptent; |
78 | bool preserve_write = prot_numa && pte_write(oldpte); | ||
78 | 79 | ||
79 | /* | 80 | /* |
80 | * Avoid trapping faults against the zero or KSM | 81 | * Avoid trapping faults against the zero or KSM |
@@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
94 | 95 | ||
95 | ptent = ptep_modify_prot_start(mm, addr, pte); | 96 | ptent = ptep_modify_prot_start(mm, addr, pte); |
96 | ptent = pte_modify(ptent, newprot); | 97 | ptent = pte_modify(ptent, newprot); |
98 | if (preserve_write) | ||
99 | ptent = pte_mkwrite(ptent); | ||
97 | 100 | ||
98 | /* Avoid taking write faults for known dirty pages */ | 101 | /* Avoid taking write faults for known dirty pages */ |
99 | if (dirty_accountable && pte_dirty(ptent) && | 102 | if (dirty_accountable && pte_dirty(ptent) && |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac381ab3..755a42c76eb4 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) | |||
103 | 103 | ||
104 | if (!is_migrate_isolate_page(buddy)) { | 104 | if (!is_migrate_isolate_page(buddy)) { |
105 | __isolate_free_page(page, order); | 105 | __isolate_free_page(page, order); |
106 | kernel_map_pages(page, (1 << order), 1); | ||
106 | set_page_refcounted(page); | 107 | set_page_refcounted(page); |
107 | isolated_page = page; | 108 | isolated_page = page; |
108 | } | 109 | } |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 75c1f2878519..29f2f8b853ae 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end, | |||
265 | vma = vma->vm_next; | 265 | vma = vma->vm_next; |
266 | 266 | ||
267 | err = walk_page_test(start, next, walk); | 267 | err = walk_page_test(start, next, walk); |
268 | if (err > 0) | 268 | if (err > 0) { |
269 | /* | ||
270 | * positive return values are purely for | ||
271 | * controlling the pagewalk, so should never | ||
272 | * be passed to the callers. | ||
273 | */ | ||
274 | err = 0; | ||
269 | continue; | 275 | continue; |
276 | } | ||
270 | if (err < 0) | 277 | if (err < 0) |
271 | break; | 278 | break; |
272 | } | 279 | } |
@@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
287 | return 0; | 287 | return 0; |
288 | 288 | ||
289 | enomem_failure: | 289 | enomem_failure: |
290 | /* | ||
291 | * dst->anon_vma is dropped here otherwise its degree can be incorrectly | ||
292 | * decremented in unlink_anon_vmas(). | ||
293 | * We can safely do this because callers of anon_vma_clone() don't care | ||
294 | * about dst->anon_vma if anon_vma_clone() failed. | ||
295 | */ | ||
296 | dst->anon_vma = NULL; | ||
290 | unlink_anon_vmas(dst); | 297 | unlink_anon_vmas(dst); |
291 | return -ENOMEM; | 298 | return -ENOMEM; |
292 | } | 299 | } |
@@ -2449,7 +2449,8 @@ redo: | |||
2449 | do { | 2449 | do { |
2450 | tid = this_cpu_read(s->cpu_slab->tid); | 2450 | tid = this_cpu_read(s->cpu_slab->tid); |
2451 | c = raw_cpu_ptr(s->cpu_slab); | 2451 | c = raw_cpu_ptr(s->cpu_slab); |
2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2452 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
2453 | unlikely(tid != READ_ONCE(c->tid))); | ||
2453 | 2454 | ||
2454 | /* | 2455 | /* |
2455 | * Irqless object alloc/free algorithm used here depends on sequence | 2456 | * Irqless object alloc/free algorithm used here depends on sequence |
@@ -2718,7 +2719,8 @@ redo: | |||
2718 | do { | 2719 | do { |
2719 | tid = this_cpu_read(s->cpu_slab->tid); | 2720 | tid = this_cpu_read(s->cpu_slab->tid); |
2720 | c = raw_cpu_ptr(s->cpu_slab); | 2721 | c = raw_cpu_ptr(s->cpu_slab); |
2721 | } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); | 2722 | } while (IS_ENABLED(CONFIG_PREEMPT) && |
2723 | unlikely(tid != READ_ONCE(c->tid))); | ||
2722 | 2724 | ||
2723 | /* Same with comment on barrier() in slab_alloc_node() */ | 2725 | /* Same with comment on barrier() in slab_alloc_node() */ |
2724 | barrier(); | 2726 | barrier(); |