diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/compaction.c | 4 | ||||
-rw-r--r-- | mm/fremap.c | 8 | ||||
-rw-r--r-- | mm/huge_memory.c | 48 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 24 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 16 | ||||
-rw-r--r-- | mm/migrate.c | 82 | ||||
-rw-r--r-- | mm/mlock.c | 44 | ||||
-rw-r--r-- | mm/mprotect.c | 13 | ||||
-rw-r--r-- | mm/page_alloc.c | 19 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 8 | ||||
-rw-r--r-- | mm/rmap.c | 4 |
14 files changed, 204 insertions, 72 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index eb69f352401d..723bbe04a0b0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -543,7 +543,7 @@ config ZSWAP | |||
543 | 543 | ||
544 | config MEM_SOFT_DIRTY | 544 | config MEM_SOFT_DIRTY |
545 | bool "Track memory changes" | 545 | bool "Track memory changes" |
546 | depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY | 546 | depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS |
547 | select PROC_PAGE_MONITOR | 547 | select PROC_PAGE_MONITOR |
548 | help | 548 | help |
549 | This option enables memory changes tracking by introducing a | 549 | This option enables memory changes tracking by introducing a |
diff --git a/mm/compaction.c b/mm/compaction.c index 805165bcd3dd..f58bcd016f43 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -134,6 +134,10 @@ static void update_pageblock_skip(struct compact_control *cc, | |||
134 | bool migrate_scanner) | 134 | bool migrate_scanner) |
135 | { | 135 | { |
136 | struct zone *zone = cc->zone; | 136 | struct zone *zone = cc->zone; |
137 | |||
138 | if (cc->ignore_skip_hint) | ||
139 | return; | ||
140 | |||
137 | if (!page) | 141 | if (!page) |
138 | return; | 142 | return; |
139 | 143 | ||
diff --git a/mm/fremap.c b/mm/fremap.c index 5bff08147768..bbc4d660221a 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -208,9 +208,10 @@ get_write_lock: | |||
208 | if (mapping_cap_account_dirty(mapping)) { | 208 | if (mapping_cap_account_dirty(mapping)) { |
209 | unsigned long addr; | 209 | unsigned long addr; |
210 | struct file *file = get_file(vma->vm_file); | 210 | struct file *file = get_file(vma->vm_file); |
211 | /* mmap_region may free vma; grab the info now */ | ||
212 | vm_flags = vma->vm_flags; | ||
211 | 213 | ||
212 | addr = mmap_region(file, start, size, | 214 | addr = mmap_region(file, start, size, vm_flags, pgoff); |
213 | vma->vm_flags, pgoff); | ||
214 | fput(file); | 215 | fput(file); |
215 | if (IS_ERR_VALUE(addr)) { | 216 | if (IS_ERR_VALUE(addr)) { |
216 | err = addr; | 217 | err = addr; |
@@ -218,7 +219,7 @@ get_write_lock: | |||
218 | BUG_ON(addr != start); | 219 | BUG_ON(addr != start); |
219 | err = 0; | 220 | err = 0; |
220 | } | 221 | } |
221 | goto out; | 222 | goto out_freed; |
222 | } | 223 | } |
223 | mutex_lock(&mapping->i_mmap_mutex); | 224 | mutex_lock(&mapping->i_mmap_mutex); |
224 | flush_dcache_mmap_lock(mapping); | 225 | flush_dcache_mmap_lock(mapping); |
@@ -253,6 +254,7 @@ get_write_lock: | |||
253 | out: | 254 | out: |
254 | if (vma) | 255 | if (vma) |
255 | vm_flags = vma->vm_flags; | 256 | vm_flags = vma->vm_flags; |
257 | out_freed: | ||
256 | if (likely(!has_write_lock)) | 258 | if (likely(!has_write_lock)) |
257 | up_read(&mm->mmap_sem); | 259 | up_read(&mm->mmap_sem); |
258 | else | 260 | else |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 33a5dc492810..95d1acb0f3d2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -882,6 +882,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
882 | ret = 0; | 882 | ret = 0; |
883 | goto out_unlock; | 883 | goto out_unlock; |
884 | } | 884 | } |
885 | |||
885 | if (unlikely(pmd_trans_splitting(pmd))) { | 886 | if (unlikely(pmd_trans_splitting(pmd))) { |
886 | /* split huge page running from under us */ | 887 | /* split huge page running from under us */ |
887 | spin_unlock(src_ptl); | 888 | spin_unlock(src_ptl); |
@@ -1153,7 +1154,7 @@ alloc: | |||
1153 | new_page = NULL; | 1154 | new_page = NULL; |
1154 | 1155 | ||
1155 | if (unlikely(!new_page)) { | 1156 | if (unlikely(!new_page)) { |
1156 | if (is_huge_zero_pmd(orig_pmd)) { | 1157 | if (!page) { |
1157 | ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, | 1158 | ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, |
1158 | address, pmd, orig_pmd, haddr); | 1159 | address, pmd, orig_pmd, haddr); |
1159 | } else { | 1160 | } else { |
@@ -1180,7 +1181,7 @@ alloc: | |||
1180 | 1181 | ||
1181 | count_vm_event(THP_FAULT_ALLOC); | 1182 | count_vm_event(THP_FAULT_ALLOC); |
1182 | 1183 | ||
1183 | if (is_huge_zero_pmd(orig_pmd)) | 1184 | if (!page) |
1184 | clear_huge_page(new_page, haddr, HPAGE_PMD_NR); | 1185 | clear_huge_page(new_page, haddr, HPAGE_PMD_NR); |
1185 | else | 1186 | else |
1186 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); | 1187 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); |
@@ -1206,7 +1207,7 @@ alloc: | |||
1206 | page_add_new_anon_rmap(new_page, vma, haddr); | 1207 | page_add_new_anon_rmap(new_page, vma, haddr); |
1207 | set_pmd_at(mm, haddr, pmd, entry); | 1208 | set_pmd_at(mm, haddr, pmd, entry); |
1208 | update_mmu_cache_pmd(vma, address, pmd); | 1209 | update_mmu_cache_pmd(vma, address, pmd); |
1209 | if (is_huge_zero_pmd(orig_pmd)) { | 1210 | if (!page) { |
1210 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); | 1211 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); |
1211 | put_huge_zero_page(); | 1212 | put_huge_zero_page(); |
1212 | } else { | 1213 | } else { |
@@ -1243,6 +1244,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1243 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) | 1244 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) |
1244 | return ERR_PTR(-EFAULT); | 1245 | return ERR_PTR(-EFAULT); |
1245 | 1246 | ||
1247 | /* Full NUMA hinting faults to serialise migration in fault paths */ | ||
1248 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | ||
1249 | goto out; | ||
1250 | |||
1246 | page = pmd_page(*pmd); | 1251 | page = pmd_page(*pmd); |
1247 | VM_BUG_ON(!PageHead(page)); | 1252 | VM_BUG_ON(!PageHead(page)); |
1248 | if (flags & FOLL_TOUCH) { | 1253 | if (flags & FOLL_TOUCH) { |
@@ -1295,6 +1300,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1295 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1300 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1296 | goto out_unlock; | 1301 | goto out_unlock; |
1297 | 1302 | ||
1303 | /* | ||
1304 | * If there are potential migrations, wait for completion and retry | ||
1305 | * without disrupting NUMA hinting information. Do not relock and | ||
1306 | * check_same as the page may no longer be mapped. | ||
1307 | */ | ||
1308 | if (unlikely(pmd_trans_migrating(*pmdp))) { | ||
1309 | spin_unlock(ptl); | ||
1310 | wait_migrate_huge_page(vma->anon_vma, pmdp); | ||
1311 | goto out; | ||
1312 | } | ||
1313 | |||
1298 | page = pmd_page(pmd); | 1314 | page = pmd_page(pmd); |
1299 | BUG_ON(is_huge_zero_page(page)); | 1315 | BUG_ON(is_huge_zero_page(page)); |
1300 | page_nid = page_to_nid(page); | 1316 | page_nid = page_to_nid(page); |
@@ -1323,23 +1339,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1323 | /* If the page was locked, there are no parallel migrations */ | 1339 | /* If the page was locked, there are no parallel migrations */ |
1324 | if (page_locked) | 1340 | if (page_locked) |
1325 | goto clear_pmdnuma; | 1341 | goto clear_pmdnuma; |
1342 | } | ||
1326 | 1343 | ||
1327 | /* | 1344 | /* Migration could have started since the pmd_trans_migrating check */ |
1328 | * Otherwise wait for potential migrations and retry. We do | 1345 | if (!page_locked) { |
1329 | * relock and check_same as the page may no longer be mapped. | ||
1330 | * As the fault is being retried, do not account for it. | ||
1331 | */ | ||
1332 | spin_unlock(ptl); | 1346 | spin_unlock(ptl); |
1333 | wait_on_page_locked(page); | 1347 | wait_on_page_locked(page); |
1334 | page_nid = -1; | 1348 | page_nid = -1; |
1335 | goto out; | 1349 | goto out; |
1336 | } | 1350 | } |
1337 | 1351 | ||
1338 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1352 | /* |
1353 | * Page is misplaced. Page lock serialises migrations. Acquire anon_vma | ||
1354 | * to serialises splits | ||
1355 | */ | ||
1339 | get_page(page); | 1356 | get_page(page); |
1340 | spin_unlock(ptl); | 1357 | spin_unlock(ptl); |
1341 | if (!page_locked) | ||
1342 | lock_page(page); | ||
1343 | anon_vma = page_lock_anon_vma_read(page); | 1358 | anon_vma = page_lock_anon_vma_read(page); |
1344 | 1359 | ||
1345 | /* Confirm the PMD did not change while page_table_lock was released */ | 1360 | /* Confirm the PMD did not change while page_table_lock was released */ |
@@ -1351,6 +1366,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1351 | goto out_unlock; | 1366 | goto out_unlock; |
1352 | } | 1367 | } |
1353 | 1368 | ||
1369 | /* Bail if we fail to protect against THP splits for any reason */ | ||
1370 | if (unlikely(!anon_vma)) { | ||
1371 | put_page(page); | ||
1372 | page_nid = -1; | ||
1373 | goto clear_pmdnuma; | ||
1374 | } | ||
1375 | |||
1354 | /* | 1376 | /* |
1355 | * Migrate the THP to the requested node, returns with page unlocked | 1377 | * Migrate the THP to the requested node, returns with page unlocked |
1356 | * and pmd_numa cleared. | 1378 | * and pmd_numa cleared. |
@@ -1517,6 +1539,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1517 | ret = 1; | 1539 | ret = 1; |
1518 | if (!prot_numa) { | 1540 | if (!prot_numa) { |
1519 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1541 | entry = pmdp_get_and_clear(mm, addr, pmd); |
1542 | if (pmd_numa(entry)) | ||
1543 | entry = pmd_mknonnuma(entry); | ||
1520 | entry = pmd_modify(entry, newprot); | 1544 | entry = pmd_modify(entry, newprot); |
1521 | ret = HPAGE_PMD_NR; | 1545 | ret = HPAGE_PMD_NR; |
1522 | BUG_ON(pmd_write(entry)); | 1546 | BUG_ON(pmd_write(entry)); |
@@ -1531,7 +1555,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1531 | */ | 1555 | */ |
1532 | if (!is_huge_zero_page(page) && | 1556 | if (!is_huge_zero_page(page) && |
1533 | !pmd_numa(*pmd)) { | 1557 | !pmd_numa(*pmd)) { |
1534 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1558 | entry = *pmd; |
1535 | entry = pmd_mknuma(entry); | 1559 | entry = pmd_mknuma(entry); |
1536 | ret = HPAGE_PMD_NR; | 1560 | ret = HPAGE_PMD_NR; |
1537 | } | 1561 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bf5e89457149..7f1a356153c0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -338,7 +338,7 @@ struct mem_cgroup { | |||
338 | static size_t memcg_size(void) | 338 | static size_t memcg_size(void) |
339 | { | 339 | { |
340 | return sizeof(struct mem_cgroup) + | 340 | return sizeof(struct mem_cgroup) + |
341 | nr_node_ids * sizeof(struct mem_cgroup_per_node); | 341 | nr_node_ids * sizeof(struct mem_cgroup_per_node *); |
342 | } | 342 | } |
343 | 343 | ||
344 | /* internal only representation about the status of kmem accounting. */ | 344 | /* internal only representation about the status of kmem accounting. */ |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b7c171602ba1..fabe55046c1d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -938,6 +938,16 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
938 | BUG_ON(!PageHWPoison(p)); | 938 | BUG_ON(!PageHWPoison(p)); |
939 | return SWAP_FAIL; | 939 | return SWAP_FAIL; |
940 | } | 940 | } |
941 | /* | ||
942 | * We pinned the head page for hwpoison handling, | ||
943 | * now we split the thp and we are interested in | ||
944 | * the hwpoisoned raw page, so move the refcount | ||
945 | * to it. | ||
946 | */ | ||
947 | if (hpage != p) { | ||
948 | put_page(hpage); | ||
949 | get_page(p); | ||
950 | } | ||
941 | /* THP is split, so ppage should be the real poisoned page. */ | 951 | /* THP is split, so ppage should be the real poisoned page. */ |
942 | ppage = p; | 952 | ppage = p; |
943 | } | 953 | } |
@@ -1505,10 +1515,16 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1505 | if (ret > 0) | 1515 | if (ret > 0) |
1506 | ret = -EIO; | 1516 | ret = -EIO; |
1507 | } else { | 1517 | } else { |
1508 | set_page_hwpoison_huge_page(hpage); | 1518 | /* overcommit hugetlb page will be freed to buddy */ |
1509 | dequeue_hwpoisoned_huge_page(hpage); | 1519 | if (PageHuge(page)) { |
1510 | atomic_long_add(1 << compound_order(hpage), | 1520 | set_page_hwpoison_huge_page(hpage); |
1511 | &num_poisoned_pages); | 1521 | dequeue_hwpoisoned_huge_page(hpage); |
1522 | atomic_long_add(1 << compound_order(hpage), | ||
1523 | &num_poisoned_pages); | ||
1524 | } else { | ||
1525 | SetPageHWPoison(page); | ||
1526 | atomic_long_inc(&num_poisoned_pages); | ||
1527 | } | ||
1512 | } | 1528 | } |
1513 | return ret; | 1529 | return ret; |
1514 | } | 1530 | } |
diff --git a/mm/memory.c b/mm/memory.c index 5d9025f3b3e1..6768ce9e57d2 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -4271,7 +4271,7 @@ void copy_user_huge_page(struct page *dst, struct page *src, | |||
4271 | } | 4271 | } |
4272 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ | 4272 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ |
4273 | 4273 | ||
4274 | #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS | 4274 | #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS |
4275 | bool ptlock_alloc(struct page *page) | 4275 | bool ptlock_alloc(struct page *page) |
4276 | { | 4276 | { |
4277 | spinlock_t *ptl; | 4277 | spinlock_t *ptl; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index eca4a3129129..0cd2c4d4e270 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1197,14 +1197,16 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * | |||
1197 | break; | 1197 | break; |
1198 | vma = vma->vm_next; | 1198 | vma = vma->vm_next; |
1199 | } | 1199 | } |
1200 | |||
1201 | if (PageHuge(page)) { | ||
1202 | if (vma) | ||
1203 | return alloc_huge_page_noerr(vma, address, 1); | ||
1204 | else | ||
1205 | return NULL; | ||
1206 | } | ||
1200 | /* | 1207 | /* |
1201 | * queue_pages_range() confirms that @page belongs to some vma, | 1208 | * if !vma, alloc_page_vma() will use task or system default policy |
1202 | * so vma shouldn't be NULL. | ||
1203 | */ | 1209 | */ |
1204 | BUG_ON(!vma); | ||
1205 | |||
1206 | if (PageHuge(page)) | ||
1207 | return alloc_huge_page_noerr(vma, address, 1); | ||
1208 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 1210 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
1209 | } | 1211 | } |
1210 | #else | 1212 | #else |
@@ -1318,7 +1320,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1318 | if (nr_failed && (flags & MPOL_MF_STRICT)) | 1320 | if (nr_failed && (flags & MPOL_MF_STRICT)) |
1319 | err = -EIO; | 1321 | err = -EIO; |
1320 | } else | 1322 | } else |
1321 | putback_lru_pages(&pagelist); | 1323 | putback_movable_pages(&pagelist); |
1322 | 1324 | ||
1323 | up_write(&mm->mmap_sem); | 1325 | up_write(&mm->mmap_sem); |
1324 | mpol_out: | 1326 | mpol_out: |
diff --git a/mm/migrate.c b/mm/migrate.c index bb940045fe85..9194375b2307 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/hugetlb_cgroup.h> | 36 | #include <linux/hugetlb_cgroup.h> |
37 | #include <linux/gfp.h> | 37 | #include <linux/gfp.h> |
38 | #include <linux/balloon_compaction.h> | 38 | #include <linux/balloon_compaction.h> |
39 | #include <linux/mmu_notifier.h> | ||
39 | 40 | ||
40 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
41 | 42 | ||
@@ -316,14 +317,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, | |||
316 | */ | 317 | */ |
317 | int migrate_page_move_mapping(struct address_space *mapping, | 318 | int migrate_page_move_mapping(struct address_space *mapping, |
318 | struct page *newpage, struct page *page, | 319 | struct page *newpage, struct page *page, |
319 | struct buffer_head *head, enum migrate_mode mode) | 320 | struct buffer_head *head, enum migrate_mode mode, |
321 | int extra_count) | ||
320 | { | 322 | { |
321 | int expected_count = 0; | 323 | int expected_count = 1 + extra_count; |
322 | void **pslot; | 324 | void **pslot; |
323 | 325 | ||
324 | if (!mapping) { | 326 | if (!mapping) { |
325 | /* Anonymous page without mapping */ | 327 | /* Anonymous page without mapping */ |
326 | if (page_count(page) != 1) | 328 | if (page_count(page) != expected_count) |
327 | return -EAGAIN; | 329 | return -EAGAIN; |
328 | return MIGRATEPAGE_SUCCESS; | 330 | return MIGRATEPAGE_SUCCESS; |
329 | } | 331 | } |
@@ -333,7 +335,7 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
333 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 335 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
334 | page_index(page)); | 336 | page_index(page)); |
335 | 337 | ||
336 | expected_count = 2 + page_has_private(page); | 338 | expected_count += 1 + page_has_private(page); |
337 | if (page_count(page) != expected_count || | 339 | if (page_count(page) != expected_count || |
338 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { | 340 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { |
339 | spin_unlock_irq(&mapping->tree_lock); | 341 | spin_unlock_irq(&mapping->tree_lock); |
@@ -583,7 +585,7 @@ int migrate_page(struct address_space *mapping, | |||
583 | 585 | ||
584 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | 586 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ |
585 | 587 | ||
586 | rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); | 588 | rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); |
587 | 589 | ||
588 | if (rc != MIGRATEPAGE_SUCCESS) | 590 | if (rc != MIGRATEPAGE_SUCCESS) |
589 | return rc; | 591 | return rc; |
@@ -610,7 +612,7 @@ int buffer_migrate_page(struct address_space *mapping, | |||
610 | 612 | ||
611 | head = page_buffers(page); | 613 | head = page_buffers(page); |
612 | 614 | ||
613 | rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); | 615 | rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0); |
614 | 616 | ||
615 | if (rc != MIGRATEPAGE_SUCCESS) | 617 | if (rc != MIGRATEPAGE_SUCCESS) |
616 | return rc; | 618 | return rc; |
@@ -1654,6 +1656,18 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) | |||
1654 | return 1; | 1656 | return 1; |
1655 | } | 1657 | } |
1656 | 1658 | ||
1659 | bool pmd_trans_migrating(pmd_t pmd) | ||
1660 | { | ||
1661 | struct page *page = pmd_page(pmd); | ||
1662 | return PageLocked(page); | ||
1663 | } | ||
1664 | |||
1665 | void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) | ||
1666 | { | ||
1667 | struct page *page = pmd_page(*pmd); | ||
1668 | wait_on_page_locked(page); | ||
1669 | } | ||
1670 | |||
1657 | /* | 1671 | /* |
1658 | * Attempt to migrate a misplaced page to the specified destination | 1672 | * Attempt to migrate a misplaced page to the specified destination |
1659 | * node. Caller is expected to have an elevated reference count on | 1673 | * node. Caller is expected to have an elevated reference count on |
@@ -1716,12 +1730,14 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1716 | struct page *page, int node) | 1730 | struct page *page, int node) |
1717 | { | 1731 | { |
1718 | spinlock_t *ptl; | 1732 | spinlock_t *ptl; |
1719 | unsigned long haddr = address & HPAGE_PMD_MASK; | ||
1720 | pg_data_t *pgdat = NODE_DATA(node); | 1733 | pg_data_t *pgdat = NODE_DATA(node); |
1721 | int isolated = 0; | 1734 | int isolated = 0; |
1722 | struct page *new_page = NULL; | 1735 | struct page *new_page = NULL; |
1723 | struct mem_cgroup *memcg = NULL; | 1736 | struct mem_cgroup *memcg = NULL; |
1724 | int page_lru = page_is_file_cache(page); | 1737 | int page_lru = page_is_file_cache(page); |
1738 | unsigned long mmun_start = address & HPAGE_PMD_MASK; | ||
1739 | unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; | ||
1740 | pmd_t orig_entry; | ||
1725 | 1741 | ||
1726 | /* | 1742 | /* |
1727 | * Rate-limit the amount of data that is being migrated to a node. | 1743 | * Rate-limit the amount of data that is being migrated to a node. |
@@ -1744,6 +1760,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1744 | goto out_fail; | 1760 | goto out_fail; |
1745 | } | 1761 | } |
1746 | 1762 | ||
1763 | if (mm_tlb_flush_pending(mm)) | ||
1764 | flush_tlb_range(vma, mmun_start, mmun_end); | ||
1765 | |||
1747 | /* Prepare a page as a migration target */ | 1766 | /* Prepare a page as a migration target */ |
1748 | __set_page_locked(new_page); | 1767 | __set_page_locked(new_page); |
1749 | SetPageSwapBacked(new_page); | 1768 | SetPageSwapBacked(new_page); |
@@ -1755,9 +1774,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1755 | WARN_ON(PageLRU(new_page)); | 1774 | WARN_ON(PageLRU(new_page)); |
1756 | 1775 | ||
1757 | /* Recheck the target PMD */ | 1776 | /* Recheck the target PMD */ |
1777 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
1758 | ptl = pmd_lock(mm, pmd); | 1778 | ptl = pmd_lock(mm, pmd); |
1759 | if (unlikely(!pmd_same(*pmd, entry))) { | 1779 | if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) { |
1780 | fail_putback: | ||
1760 | spin_unlock(ptl); | 1781 | spin_unlock(ptl); |
1782 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
1761 | 1783 | ||
1762 | /* Reverse changes made by migrate_page_copy() */ | 1784 | /* Reverse changes made by migrate_page_copy() */ |
1763 | if (TestClearPageActive(new_page)) | 1785 | if (TestClearPageActive(new_page)) |
@@ -1774,7 +1796,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1774 | putback_lru_page(page); | 1796 | putback_lru_page(page); |
1775 | mod_zone_page_state(page_zone(page), | 1797 | mod_zone_page_state(page_zone(page), |
1776 | NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); | 1798 | NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); |
1777 | goto out_fail; | 1799 | |
1800 | goto out_unlock; | ||
1778 | } | 1801 | } |
1779 | 1802 | ||
1780 | /* | 1803 | /* |
@@ -1786,16 +1809,35 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1786 | */ | 1809 | */ |
1787 | mem_cgroup_prepare_migration(page, new_page, &memcg); | 1810 | mem_cgroup_prepare_migration(page, new_page, &memcg); |
1788 | 1811 | ||
1812 | orig_entry = *pmd; | ||
1789 | entry = mk_pmd(new_page, vma->vm_page_prot); | 1813 | entry = mk_pmd(new_page, vma->vm_page_prot); |
1790 | entry = pmd_mknonnuma(entry); | ||
1791 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
1792 | entry = pmd_mkhuge(entry); | 1814 | entry = pmd_mkhuge(entry); |
1815 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
1793 | 1816 | ||
1794 | pmdp_clear_flush(vma, haddr, pmd); | 1817 | /* |
1795 | set_pmd_at(mm, haddr, pmd, entry); | 1818 | * Clear the old entry under pagetable lock and establish the new PTE. |
1796 | page_add_new_anon_rmap(new_page, vma, haddr); | 1819 | * Any parallel GUP will either observe the old page blocking on the |
1820 | * page lock, block on the page table lock or observe the new page. | ||
1821 | * The SetPageUptodate on the new page and page_add_new_anon_rmap | ||
1822 | * guarantee the copy is visible before the pagetable update. | ||
1823 | */ | ||
1824 | flush_cache_range(vma, mmun_start, mmun_end); | ||
1825 | page_add_new_anon_rmap(new_page, vma, mmun_start); | ||
1826 | pmdp_clear_flush(vma, mmun_start, pmd); | ||
1827 | set_pmd_at(mm, mmun_start, pmd, entry); | ||
1828 | flush_tlb_range(vma, mmun_start, mmun_end); | ||
1797 | update_mmu_cache_pmd(vma, address, &entry); | 1829 | update_mmu_cache_pmd(vma, address, &entry); |
1830 | |||
1831 | if (page_count(page) != 2) { | ||
1832 | set_pmd_at(mm, mmun_start, pmd, orig_entry); | ||
1833 | flush_tlb_range(vma, mmun_start, mmun_end); | ||
1834 | update_mmu_cache_pmd(vma, address, &entry); | ||
1835 | page_remove_rmap(new_page); | ||
1836 | goto fail_putback; | ||
1837 | } | ||
1838 | |||
1798 | page_remove_rmap(page); | 1839 | page_remove_rmap(page); |
1840 | |||
1799 | /* | 1841 | /* |
1800 | * Finish the charge transaction under the page table lock to | 1842 | * Finish the charge transaction under the page table lock to |
1801 | * prevent split_huge_page() from dividing up the charge | 1843 | * prevent split_huge_page() from dividing up the charge |
@@ -1803,6 +1845,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1803 | */ | 1845 | */ |
1804 | mem_cgroup_end_migration(memcg, page, new_page, true); | 1846 | mem_cgroup_end_migration(memcg, page, new_page, true); |
1805 | spin_unlock(ptl); | 1847 | spin_unlock(ptl); |
1848 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
1806 | 1849 | ||
1807 | unlock_page(new_page); | 1850 | unlock_page(new_page); |
1808 | unlock_page(page); | 1851 | unlock_page(page); |
@@ -1820,10 +1863,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1820 | out_fail: | 1863 | out_fail: |
1821 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1864 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1822 | out_dropref: | 1865 | out_dropref: |
1823 | entry = pmd_mknonnuma(entry); | 1866 | ptl = pmd_lock(mm, pmd); |
1824 | set_pmd_at(mm, haddr, pmd, entry); | 1867 | if (pmd_same(*pmd, entry)) { |
1825 | update_mmu_cache_pmd(vma, address, &entry); | 1868 | entry = pmd_mknonnuma(entry); |
1869 | set_pmd_at(mm, mmun_start, pmd, entry); | ||
1870 | update_mmu_cache_pmd(vma, address, &entry); | ||
1871 | } | ||
1872 | spin_unlock(ptl); | ||
1826 | 1873 | ||
1874 | out_unlock: | ||
1827 | unlock_page(page); | 1875 | unlock_page(page); |
1828 | put_page(page); | 1876 | put_page(page); |
1829 | return 0; | 1877 | return 0; |
diff --git a/mm/mlock.c b/mm/mlock.c index d480cd6fc475..192e6eebe4f2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page) | |||
133 | 133 | ||
134 | /** | 134 | /** |
135 | * munlock_vma_page - munlock a vma page | 135 | * munlock_vma_page - munlock a vma page |
136 | * @page - page to be unlocked | 136 | * @page - page to be unlocked, either a normal page or THP page head |
137 | * | ||
138 | * returns the size of the page as a page mask (0 for normal page, | ||
139 | * HPAGE_PMD_NR - 1 for THP head page) | ||
137 | * | 140 | * |
138 | * called from munlock()/munmap() path with page supposedly on the LRU. | 141 | * called from munlock()/munmap() path with page supposedly on the LRU. |
139 | * When we munlock a page, because the vma where we found the page is being | 142 | * When we munlock a page, because the vma where we found the page is being |
@@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page) | |||
148 | */ | 151 | */ |
149 | unsigned int munlock_vma_page(struct page *page) | 152 | unsigned int munlock_vma_page(struct page *page) |
150 | { | 153 | { |
151 | unsigned int page_mask = 0; | 154 | unsigned int nr_pages; |
152 | 155 | ||
153 | BUG_ON(!PageLocked(page)); | 156 | BUG_ON(!PageLocked(page)); |
154 | 157 | ||
155 | if (TestClearPageMlocked(page)) { | 158 | if (TestClearPageMlocked(page)) { |
156 | unsigned int nr_pages = hpage_nr_pages(page); | 159 | nr_pages = hpage_nr_pages(page); |
157 | mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); | 160 | mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); |
158 | page_mask = nr_pages - 1; | ||
159 | if (!isolate_lru_page(page)) | 161 | if (!isolate_lru_page(page)) |
160 | __munlock_isolated_page(page); | 162 | __munlock_isolated_page(page); |
161 | else | 163 | else |
162 | __munlock_isolation_failed(page); | 164 | __munlock_isolation_failed(page); |
165 | } else { | ||
166 | nr_pages = hpage_nr_pages(page); | ||
163 | } | 167 | } |
164 | 168 | ||
165 | return page_mask; | 169 | /* |
170 | * Regardless of the original PageMlocked flag, we determine nr_pages | ||
171 | * after touching the flag. This leaves a possible race with a THP page | ||
172 | * split, such that a whole THP page was munlocked, but nr_pages == 1. | ||
173 | * Returning a smaller mask due to that is OK, the worst that can | ||
174 | * happen is subsequent useless scanning of the former tail pages. | ||
175 | * The NR_MLOCK accounting can however become broken. | ||
176 | */ | ||
177 | return nr_pages - 1; | ||
166 | } | 178 | } |
167 | 179 | ||
168 | /** | 180 | /** |
@@ -286,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) | |||
286 | { | 298 | { |
287 | int i; | 299 | int i; |
288 | int nr = pagevec_count(pvec); | 300 | int nr = pagevec_count(pvec); |
289 | int delta_munlocked = -nr; | 301 | int delta_munlocked; |
290 | struct pagevec pvec_putback; | 302 | struct pagevec pvec_putback; |
291 | int pgrescued = 0; | 303 | int pgrescued = 0; |
292 | 304 | ||
305 | pagevec_init(&pvec_putback, 0); | ||
306 | |||
293 | /* Phase 1: page isolation */ | 307 | /* Phase 1: page isolation */ |
294 | spin_lock_irq(&zone->lru_lock); | 308 | spin_lock_irq(&zone->lru_lock); |
295 | for (i = 0; i < nr; i++) { | 309 | for (i = 0; i < nr; i++) { |
@@ -318,18 +332,21 @@ skip_munlock: | |||
318 | /* | 332 | /* |
319 | * We won't be munlocking this page in the next phase | 333 | * We won't be munlocking this page in the next phase |
320 | * but we still need to release the follow_page_mask() | 334 | * but we still need to release the follow_page_mask() |
321 | * pin. | 335 | * pin. We cannot do it under lru_lock however. If it's |
336 | * the last pin, __page_cache_release would deadlock. | ||
322 | */ | 337 | */ |
338 | pagevec_add(&pvec_putback, pvec->pages[i]); | ||
323 | pvec->pages[i] = NULL; | 339 | pvec->pages[i] = NULL; |
324 | put_page(page); | ||
325 | delta_munlocked++; | ||
326 | } | 340 | } |
327 | } | 341 | } |
342 | delta_munlocked = -nr + pagevec_count(&pvec_putback); | ||
328 | __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); | 343 | __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); |
329 | spin_unlock_irq(&zone->lru_lock); | 344 | spin_unlock_irq(&zone->lru_lock); |
330 | 345 | ||
346 | /* Now we can release pins of pages that we are not munlocking */ | ||
347 | pagevec_release(&pvec_putback); | ||
348 | |||
331 | /* Phase 2: page munlock */ | 349 | /* Phase 2: page munlock */ |
332 | pagevec_init(&pvec_putback, 0); | ||
333 | for (i = 0; i < nr; i++) { | 350 | for (i = 0; i < nr; i++) { |
334 | struct page *page = pvec->pages[i]; | 351 | struct page *page = pvec->pages[i]; |
335 | 352 | ||
@@ -440,7 +457,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
440 | 457 | ||
441 | while (start < end) { | 458 | while (start < end) { |
442 | struct page *page = NULL; | 459 | struct page *page = NULL; |
443 | unsigned int page_mask, page_increm; | 460 | unsigned int page_mask; |
461 | unsigned long page_increm; | ||
444 | struct pagevec pvec; | 462 | struct pagevec pvec; |
445 | struct zone *zone; | 463 | struct zone *zone; |
446 | int zoneid; | 464 | int zoneid; |
@@ -490,7 +508,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
490 | goto next; | 508 | goto next; |
491 | } | 509 | } |
492 | } | 510 | } |
493 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); | 511 | /* It's a bug to munlock in the middle of a THP page */ |
512 | VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); | ||
513 | page_increm = 1 + page_mask; | ||
494 | start += page_increm * PAGE_SIZE; | 514 | start += page_increm * PAGE_SIZE; |
495 | next: | 515 | next: |
496 | cond_resched(); | 516 | cond_resched(); |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 26667971c824..bb53a6591aea 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -52,17 +52,21 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
52 | pte_t ptent; | 52 | pte_t ptent; |
53 | bool updated = false; | 53 | bool updated = false; |
54 | 54 | ||
55 | ptent = ptep_modify_prot_start(mm, addr, pte); | ||
56 | if (!prot_numa) { | 55 | if (!prot_numa) { |
56 | ptent = ptep_modify_prot_start(mm, addr, pte); | ||
57 | if (pte_numa(ptent)) | ||
58 | ptent = pte_mknonnuma(ptent); | ||
57 | ptent = pte_modify(ptent, newprot); | 59 | ptent = pte_modify(ptent, newprot); |
58 | updated = true; | 60 | updated = true; |
59 | } else { | 61 | } else { |
60 | struct page *page; | 62 | struct page *page; |
61 | 63 | ||
64 | ptent = *pte; | ||
62 | page = vm_normal_page(vma, addr, oldpte); | 65 | page = vm_normal_page(vma, addr, oldpte); |
63 | if (page) { | 66 | if (page) { |
64 | if (!pte_numa(oldpte)) { | 67 | if (!pte_numa(oldpte)) { |
65 | ptent = pte_mknuma(ptent); | 68 | ptent = pte_mknuma(ptent); |
69 | set_pte_at(mm, addr, pte, ptent); | ||
66 | updated = true; | 70 | updated = true; |
67 | } | 71 | } |
68 | } | 72 | } |
@@ -79,7 +83,10 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
79 | 83 | ||
80 | if (updated) | 84 | if (updated) |
81 | pages++; | 85 | pages++; |
82 | ptep_modify_prot_commit(mm, addr, pte, ptent); | 86 | |
87 | /* Only !prot_numa always clears the pte */ | ||
88 | if (!prot_numa) | ||
89 | ptep_modify_prot_commit(mm, addr, pte, ptent); | ||
83 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { | 90 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { |
84 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 91 | swp_entry_t entry = pte_to_swp_entry(oldpte); |
85 | 92 | ||
@@ -181,6 +188,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, | |||
181 | BUG_ON(addr >= end); | 188 | BUG_ON(addr >= end); |
182 | pgd = pgd_offset(mm, addr); | 189 | pgd = pgd_offset(mm, addr); |
183 | flush_cache_range(vma, addr, end); | 190 | flush_cache_range(vma, addr, end); |
191 | set_tlb_flush_pending(mm); | ||
184 | do { | 192 | do { |
185 | next = pgd_addr_end(addr, end); | 193 | next = pgd_addr_end(addr, end); |
186 | if (pgd_none_or_clear_bad(pgd)) | 194 | if (pgd_none_or_clear_bad(pgd)) |
@@ -192,6 +200,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, | |||
192 | /* Only flush the TLB if we actually modified any entries: */ | 200 | /* Only flush the TLB if we actually modified any entries: */ |
193 | if (pages) | 201 | if (pages) |
194 | flush_tlb_range(vma, start, end); | 202 | flush_tlb_range(vma, start, end); |
203 | clear_tlb_flush_pending(mm); | ||
195 | 204 | ||
196 | return pages; | 205 | return pages; |
197 | } | 206 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 580a5f075ed0..5248fe070aa4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1816,7 +1816,7 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) | |||
1816 | 1816 | ||
1817 | static bool zone_local(struct zone *local_zone, struct zone *zone) | 1817 | static bool zone_local(struct zone *local_zone, struct zone *zone) |
1818 | { | 1818 | { |
1819 | return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE; | 1819 | return local_zone->node == zone->node; |
1820 | } | 1820 | } |
1821 | 1821 | ||
1822 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | 1822 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) |
@@ -1913,18 +1913,17 @@ zonelist_scan: | |||
1913 | * page was allocated in should have no effect on the | 1913 | * page was allocated in should have no effect on the |
1914 | * time the page has in memory before being reclaimed. | 1914 | * time the page has in memory before being reclaimed. |
1915 | * | 1915 | * |
1916 | * When zone_reclaim_mode is enabled, try to stay in | 1916 | * Try to stay in local zones in the fastpath. If |
1917 | * local zones in the fastpath. If that fails, the | 1917 | * that fails, the slowpath is entered, which will do |
1918 | * slowpath is entered, which will do another pass | 1918 | * another pass starting with the local zones, but |
1919 | * starting with the local zones, but ultimately fall | 1919 | * ultimately fall back to remote zones that do not |
1920 | * back to remote zones that do not partake in the | 1920 | * partake in the fairness round-robin cycle of this |
1921 | * fairness round-robin cycle of this zonelist. | 1921 | * zonelist. |
1922 | */ | 1922 | */ |
1923 | if (alloc_flags & ALLOC_WMARK_LOW) { | 1923 | if (alloc_flags & ALLOC_WMARK_LOW) { |
1924 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | 1924 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) |
1925 | continue; | 1925 | continue; |
1926 | if (zone_reclaim_mode && | 1926 | if (!zone_local(preferred_zone, zone)) |
1927 | !zone_local(preferred_zone, zone)) | ||
1928 | continue; | 1927 | continue; |
1929 | } | 1928 | } |
1930 | /* | 1929 | /* |
@@ -2390,7 +2389,7 @@ static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2390 | * thrash fairness information for zones that are not | 2389 | * thrash fairness information for zones that are not |
2391 | * actually part of this zonelist's round-robin cycle. | 2390 | * actually part of this zonelist's round-robin cycle. |
2392 | */ | 2391 | */ |
2393 | if (zone_reclaim_mode && !zone_local(preferred_zone, zone)) | 2392 | if (!zone_local(preferred_zone, zone)) |
2394 | continue; | 2393 | continue; |
2395 | mod_zone_page_state(zone, NR_ALLOC_BATCH, | 2394 | mod_zone_page_state(zone, NR_ALLOC_BATCH, |
2396 | high_wmark_pages(zone) - | 2395 | high_wmark_pages(zone) - |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index cbb38545d9d6..a8b919925934 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -110,9 +110,10 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, | |||
110 | pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, | 110 | pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, |
111 | pte_t *ptep) | 111 | pte_t *ptep) |
112 | { | 112 | { |
113 | struct mm_struct *mm = (vma)->vm_mm; | ||
113 | pte_t pte; | 114 | pte_t pte; |
114 | pte = ptep_get_and_clear((vma)->vm_mm, address, ptep); | 115 | pte = ptep_get_and_clear(mm, address, ptep); |
115 | if (pte_accessible(pte)) | 116 | if (pte_accessible(mm, pte)) |
116 | flush_tlb_page(vma, address); | 117 | flush_tlb_page(vma, address); |
117 | return pte; | 118 | return pte; |
118 | } | 119 | } |
@@ -191,6 +192,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) | |||
191 | void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, | 192 | void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, |
192 | pmd_t *pmdp) | 193 | pmd_t *pmdp) |
193 | { | 194 | { |
195 | pmd_t entry = *pmdp; | ||
196 | if (pmd_numa(entry)) | ||
197 | entry = pmd_mknonnuma(entry); | ||
194 | set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp)); | 198 | set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp)); |
195 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | 199 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); |
196 | } | 200 | } |
@@ -600,7 +600,11 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, | |||
600 | spinlock_t *ptl; | 600 | spinlock_t *ptl; |
601 | 601 | ||
602 | if (unlikely(PageHuge(page))) { | 602 | if (unlikely(PageHuge(page))) { |
603 | /* when pud is not present, pte will be NULL */ | ||
603 | pte = huge_pte_offset(mm, address); | 604 | pte = huge_pte_offset(mm, address); |
605 | if (!pte) | ||
606 | return NULL; | ||
607 | |||
604 | ptl = huge_pte_lockptr(page_hstate(page), mm, pte); | 608 | ptl = huge_pte_lockptr(page_hstate(page), mm, pte); |
605 | goto check; | 609 | goto check; |
606 | } | 610 | } |