aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c582
1 files changed, 278 insertions, 304 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 6bf2b8564376..72b520897339 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2070,13 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
2070 * case, all we need to do here is to mark the page as writable and update 2070 * case, all we need to do here is to mark the page as writable and update
2071 * any related book-keeping. 2071 * any related book-keeping.
2072 */ 2072 */
2073static inline int wp_page_reuse(struct mm_struct *mm, 2073static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,
2074 struct vm_area_struct *vma, unsigned long address, 2074 struct page *page, int page_mkwrite, int dirty_shared)
2075 pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, 2075 __releases(fe->ptl)
2076 struct page *page, int page_mkwrite,
2077 int dirty_shared)
2078 __releases(ptl)
2079{ 2076{
2077 struct vm_area_struct *vma = fe->vma;
2080 pte_t entry; 2078 pte_t entry;
2081 /* 2079 /*
2082 * Clear the pages cpupid information as the existing 2080 * Clear the pages cpupid information as the existing
@@ -2086,12 +2084,12 @@ static inline int wp_page_reuse(struct mm_struct *mm,
2086 if (page) 2084 if (page)
2087 page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); 2085 page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
2088 2086
2089 flush_cache_page(vma, address, pte_pfn(orig_pte)); 2087 flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
2090 entry = pte_mkyoung(orig_pte); 2088 entry = pte_mkyoung(orig_pte);
2091 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2089 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2092 if (ptep_set_access_flags(vma, address, page_table, entry, 1)) 2090 if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1))
2093 update_mmu_cache(vma, address, page_table); 2091 update_mmu_cache(vma, fe->address, fe->pte);
2094 pte_unmap_unlock(page_table, ptl); 2092 pte_unmap_unlock(fe->pte, fe->ptl);
2095 2093
2096 if (dirty_shared) { 2094 if (dirty_shared) {
2097 struct address_space *mapping; 2095 struct address_space *mapping;
@@ -2137,30 +2135,31 @@ static inline int wp_page_reuse(struct mm_struct *mm,
2137 * held to the old page, as well as updating the rmap. 2135 * held to the old page, as well as updating the rmap.
2138 * - In any case, unlock the PTL and drop the reference we took to the old page. 2136 * - In any case, unlock the PTL and drop the reference we took to the old page.
2139 */ 2137 */
2140static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, 2138static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
2141 unsigned long address, pte_t *page_table, pmd_t *pmd, 2139 struct page *old_page)
2142 pte_t orig_pte, struct page *old_page)
2143{ 2140{
2141 struct vm_area_struct *vma = fe->vma;
2142 struct mm_struct *mm = vma->vm_mm;
2144 struct page *new_page = NULL; 2143 struct page *new_page = NULL;
2145 spinlock_t *ptl = NULL;
2146 pte_t entry; 2144 pte_t entry;
2147 int page_copied = 0; 2145 int page_copied = 0;
2148 const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */ 2146 const unsigned long mmun_start = fe->address & PAGE_MASK;
2149 const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */ 2147 const unsigned long mmun_end = mmun_start + PAGE_SIZE;
2150 struct mem_cgroup *memcg; 2148 struct mem_cgroup *memcg;
2151 2149
2152 if (unlikely(anon_vma_prepare(vma))) 2150 if (unlikely(anon_vma_prepare(vma)))
2153 goto oom; 2151 goto oom;
2154 2152
2155 if (is_zero_pfn(pte_pfn(orig_pte))) { 2153 if (is_zero_pfn(pte_pfn(orig_pte))) {
2156 new_page = alloc_zeroed_user_highpage_movable(vma, address); 2154 new_page = alloc_zeroed_user_highpage_movable(vma, fe->address);
2157 if (!new_page) 2155 if (!new_page)
2158 goto oom; 2156 goto oom;
2159 } else { 2157 } else {
2160 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 2158 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
2159 fe->address);
2161 if (!new_page) 2160 if (!new_page)
2162 goto oom; 2161 goto oom;
2163 cow_user_page(new_page, old_page, address, vma); 2162 cow_user_page(new_page, old_page, fe->address, vma);
2164 } 2163 }
2165 2164
2166 if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) 2165 if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
@@ -2173,8 +2172,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
2173 /* 2172 /*
2174 * Re-check the pte - we dropped the lock 2173 * Re-check the pte - we dropped the lock
2175 */ 2174 */
2176 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2175 fe->pte = pte_offset_map_lock(mm, fe->pmd, fe->address, &fe->ptl);
2177 if (likely(pte_same(*page_table, orig_pte))) { 2176 if (likely(pte_same(*fe->pte, orig_pte))) {
2178 if (old_page) { 2177 if (old_page) {
2179 if (!PageAnon(old_page)) { 2178 if (!PageAnon(old_page)) {
2180 dec_mm_counter_fast(mm, 2179 dec_mm_counter_fast(mm,
@@ -2184,7 +2183,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
2184 } else { 2183 } else {
2185 inc_mm_counter_fast(mm, MM_ANONPAGES); 2184 inc_mm_counter_fast(mm, MM_ANONPAGES);
2186 } 2185 }
2187 flush_cache_page(vma, address, pte_pfn(orig_pte)); 2186 flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
2188 entry = mk_pte(new_page, vma->vm_page_prot); 2187 entry = mk_pte(new_page, vma->vm_page_prot);
2189 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2188 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2190 /* 2189 /*
@@ -2193,8 +2192,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
2193 * seen in the presence of one thread doing SMC and another 2192 * seen in the presence of one thread doing SMC and another
2194 * thread doing COW. 2193 * thread doing COW.
2195 */ 2194 */
2196 ptep_clear_flush_notify(vma, address, page_table); 2195 ptep_clear_flush_notify(vma, fe->address, fe->pte);
2197 page_add_new_anon_rmap(new_page, vma, address, false); 2196 page_add_new_anon_rmap(new_page, vma, fe->address, false);
2198 mem_cgroup_commit_charge(new_page, memcg, false, false); 2197 mem_cgroup_commit_charge(new_page, memcg, false, false);
2199 lru_cache_add_active_or_unevictable(new_page, vma); 2198 lru_cache_add_active_or_unevictable(new_page, vma);
2200 /* 2199 /*
@@ -2202,8 +2201,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
2202 * mmu page tables (such as kvm shadow page tables), we want the 2201 * mmu page tables (such as kvm shadow page tables), we want the
2203 * new page to be mapped directly into the secondary page table. 2202 * new page to be mapped directly into the secondary page table.
2204 */ 2203 */
2205 set_pte_at_notify(mm, address, page_table, entry); 2204 set_pte_at_notify(mm, fe->address, fe->pte, entry);
2206 update_mmu_cache(vma, address, page_table); 2205 update_mmu_cache(vma, fe->address, fe->pte);
2207 if (old_page) { 2206 if (old_page) {
2208 /* 2207 /*
2209 * Only after switching the pte to the new page may 2208 * Only after switching the pte to the new page may
@@ -2240,7 +2239,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
2240 if (new_page) 2239 if (new_page)
2241 put_page(new_page); 2240 put_page(new_page);
2242 2241
2243 pte_unmap_unlock(page_table, ptl); 2242 pte_unmap_unlock(fe->pte, fe->ptl);
2244 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2243 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2245 if (old_page) { 2244 if (old_page) {
2246 /* 2245 /*
@@ -2268,44 +2267,43 @@ oom:
2268 * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED 2267 * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
2269 * mapping 2268 * mapping
2270 */ 2269 */
2271static int wp_pfn_shared(struct mm_struct *mm, 2270static int wp_pfn_shared(struct fault_env *fe, pte_t orig_pte)
2272 struct vm_area_struct *vma, unsigned long address,
2273 pte_t *page_table, spinlock_t *ptl, pte_t orig_pte,
2274 pmd_t *pmd)
2275{ 2271{
2272 struct vm_area_struct *vma = fe->vma;
2273
2276 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { 2274 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
2277 struct vm_fault vmf = { 2275 struct vm_fault vmf = {
2278 .page = NULL, 2276 .page = NULL,
2279 .pgoff = linear_page_index(vma, address), 2277 .pgoff = linear_page_index(vma, fe->address),
2280 .virtual_address = (void __user *)(address & PAGE_MASK), 2278 .virtual_address =
2279 (void __user *)(fe->address & PAGE_MASK),
2281 .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, 2280 .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE,
2282 }; 2281 };
2283 int ret; 2282 int ret;
2284 2283
2285 pte_unmap_unlock(page_table, ptl); 2284 pte_unmap_unlock(fe->pte, fe->ptl);
2286 ret = vma->vm_ops->pfn_mkwrite(vma, &vmf); 2285 ret = vma->vm_ops->pfn_mkwrite(vma, &vmf);
2287 if (ret & VM_FAULT_ERROR) 2286 if (ret & VM_FAULT_ERROR)
2288 return ret; 2287 return ret;
2289 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2288 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
2289 &fe->ptl);
2290 /* 2290 /*
2291 * We might have raced with another page fault while we 2291 * We might have raced with another page fault while we
2292 * released the pte_offset_map_lock. 2292 * released the pte_offset_map_lock.
2293 */ 2293 */
2294 if (!pte_same(*page_table, orig_pte)) { 2294 if (!pte_same(*fe->pte, orig_pte)) {
2295 pte_unmap_unlock(page_table, ptl); 2295 pte_unmap_unlock(fe->pte, fe->ptl);
2296 return 0; 2296 return 0;
2297 } 2297 }
2298 } 2298 }
2299 return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte, 2299 return wp_page_reuse(fe, orig_pte, NULL, 0, 0);
2300 NULL, 0, 0);
2301} 2300}
2302 2301
2303static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, 2302static int wp_page_shared(struct fault_env *fe, pte_t orig_pte,
2304 unsigned long address, pte_t *page_table, 2303 struct page *old_page)
2305 pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, 2304 __releases(fe->ptl)
2306 struct page *old_page)
2307 __releases(ptl)
2308{ 2305{
2306 struct vm_area_struct *vma = fe->vma;
2309 int page_mkwrite = 0; 2307 int page_mkwrite = 0;
2310 2308
2311 get_page(old_page); 2309 get_page(old_page);
@@ -2313,8 +2311,8 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
2313 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { 2311 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
2314 int tmp; 2312 int tmp;
2315 2313
2316 pte_unmap_unlock(page_table, ptl); 2314 pte_unmap_unlock(fe->pte, fe->ptl);
2317 tmp = do_page_mkwrite(vma, old_page, address); 2315 tmp = do_page_mkwrite(vma, old_page, fe->address);
2318 if (unlikely(!tmp || (tmp & 2316 if (unlikely(!tmp || (tmp &
2319 (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { 2317 (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
2320 put_page(old_page); 2318 put_page(old_page);
@@ -2326,19 +2324,18 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
2326 * they did, we just return, as we can count on the 2324 * they did, we just return, as we can count on the
2327 * MMU to tell us if they didn't also make it writable. 2325 * MMU to tell us if they didn't also make it writable.
2328 */ 2326 */
2329 page_table = pte_offset_map_lock(mm, pmd, address, 2327 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
2330 &ptl); 2328 &fe->ptl);
2331 if (!pte_same(*page_table, orig_pte)) { 2329 if (!pte_same(*fe->pte, orig_pte)) {
2332 unlock_page(old_page); 2330 unlock_page(old_page);
2333 pte_unmap_unlock(page_table, ptl); 2331 pte_unmap_unlock(fe->pte, fe->ptl);
2334 put_page(old_page); 2332 put_page(old_page);
2335 return 0; 2333 return 0;
2336 } 2334 }
2337 page_mkwrite = 1; 2335 page_mkwrite = 1;
2338 } 2336 }
2339 2337
2340 return wp_page_reuse(mm, vma, address, page_table, ptl, 2338 return wp_page_reuse(fe, orig_pte, old_page, page_mkwrite, 1);
2341 orig_pte, old_page, page_mkwrite, 1);
2342} 2339}
2343 2340
2344/* 2341/*
@@ -2359,14 +2356,13 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
2359 * but allow concurrent faults), with pte both mapped and locked. 2356 * but allow concurrent faults), with pte both mapped and locked.
2360 * We return with mmap_sem still held, but pte unmapped and unlocked. 2357 * We return with mmap_sem still held, but pte unmapped and unlocked.
2361 */ 2358 */
2362static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, 2359static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
2363 unsigned long address, pte_t *page_table, pmd_t *pmd, 2360 __releases(fe->ptl)
2364 spinlock_t *ptl, pte_t orig_pte)
2365 __releases(ptl)
2366{ 2361{
2362 struct vm_area_struct *vma = fe->vma;
2367 struct page *old_page; 2363 struct page *old_page;
2368 2364
2369 old_page = vm_normal_page(vma, address, orig_pte); 2365 old_page = vm_normal_page(vma, fe->address, orig_pte);
2370 if (!old_page) { 2366 if (!old_page) {
2371 /* 2367 /*
2372 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a 2368 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
@@ -2377,12 +2373,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2377 */ 2373 */
2378 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == 2374 if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
2379 (VM_WRITE|VM_SHARED)) 2375 (VM_WRITE|VM_SHARED))
2380 return wp_pfn_shared(mm, vma, address, page_table, ptl, 2376 return wp_pfn_shared(fe, orig_pte);
2381 orig_pte, pmd);
2382 2377
2383 pte_unmap_unlock(page_table, ptl); 2378 pte_unmap_unlock(fe->pte, fe->ptl);
2384 return wp_page_copy(mm, vma, address, page_table, pmd, 2379 return wp_page_copy(fe, orig_pte, old_page);
2385 orig_pte, old_page);
2386 } 2380 }
2387 2381
2388 /* 2382 /*
@@ -2393,13 +2387,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2393 int total_mapcount; 2387 int total_mapcount;
2394 if (!trylock_page(old_page)) { 2388 if (!trylock_page(old_page)) {
2395 get_page(old_page); 2389 get_page(old_page);
2396 pte_unmap_unlock(page_table, ptl); 2390 pte_unmap_unlock(fe->pte, fe->ptl);
2397 lock_page(old_page); 2391 lock_page(old_page);
2398 page_table = pte_offset_map_lock(mm, pmd, address, 2392 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
2399 &ptl); 2393 fe->address, &fe->ptl);
2400 if (!pte_same(*page_table, orig_pte)) { 2394 if (!pte_same(*fe->pte, orig_pte)) {
2401 unlock_page(old_page); 2395 unlock_page(old_page);
2402 pte_unmap_unlock(page_table, ptl); 2396 pte_unmap_unlock(fe->pte, fe->ptl);
2403 put_page(old_page); 2397 put_page(old_page);
2404 return 0; 2398 return 0;
2405 } 2399 }
@@ -2417,14 +2411,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2417 page_move_anon_rmap(old_page, vma); 2411 page_move_anon_rmap(old_page, vma);
2418 } 2412 }
2419 unlock_page(old_page); 2413 unlock_page(old_page);
2420 return wp_page_reuse(mm, vma, address, page_table, ptl, 2414 return wp_page_reuse(fe, orig_pte, old_page, 0, 0);
2421 orig_pte, old_page, 0, 0);
2422 } 2415 }
2423 unlock_page(old_page); 2416 unlock_page(old_page);
2424 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == 2417 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
2425 (VM_WRITE|VM_SHARED))) { 2418 (VM_WRITE|VM_SHARED))) {
2426 return wp_page_shared(mm, vma, address, page_table, pmd, 2419 return wp_page_shared(fe, orig_pte, old_page);
2427 ptl, orig_pte, old_page);
2428 } 2420 }
2429 2421
2430 /* 2422 /*
@@ -2432,9 +2424,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2432 */ 2424 */
2433 get_page(old_page); 2425 get_page(old_page);
2434 2426
2435 pte_unmap_unlock(page_table, ptl); 2427 pte_unmap_unlock(fe->pte, fe->ptl);
2436 return wp_page_copy(mm, vma, address, page_table, pmd, 2428 return wp_page_copy(fe, orig_pte, old_page);
2437 orig_pte, old_page);
2438} 2429}
2439 2430
2440static void unmap_mapping_range_vma(struct vm_area_struct *vma, 2431static void unmap_mapping_range_vma(struct vm_area_struct *vma,
@@ -2522,11 +2513,9 @@ EXPORT_SYMBOL(unmap_mapping_range);
2522 * We return with the mmap_sem locked or unlocked in the same cases 2513 * We return with the mmap_sem locked or unlocked in the same cases
2523 * as does filemap_fault(). 2514 * as does filemap_fault().
2524 */ 2515 */
2525int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, 2516int do_swap_page(struct fault_env *fe, pte_t orig_pte)
2526 unsigned long address, pte_t *page_table, pmd_t *pmd,
2527 unsigned int flags, pte_t orig_pte)
2528{ 2517{
2529 spinlock_t *ptl; 2518 struct vm_area_struct *vma = fe->vma;
2530 struct page *page, *swapcache; 2519 struct page *page, *swapcache;
2531 struct mem_cgroup *memcg; 2520 struct mem_cgroup *memcg;
2532 swp_entry_t entry; 2521 swp_entry_t entry;
@@ -2535,17 +2524,17 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2535 int exclusive = 0; 2524 int exclusive = 0;
2536 int ret = 0; 2525 int ret = 0;
2537 2526
2538 if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) 2527 if (!pte_unmap_same(vma->vm_mm, fe->pmd, fe->pte, orig_pte))
2539 goto out; 2528 goto out;
2540 2529
2541 entry = pte_to_swp_entry(orig_pte); 2530 entry = pte_to_swp_entry(orig_pte);
2542 if (unlikely(non_swap_entry(entry))) { 2531 if (unlikely(non_swap_entry(entry))) {
2543 if (is_migration_entry(entry)) { 2532 if (is_migration_entry(entry)) {
2544 migration_entry_wait(mm, pmd, address); 2533 migration_entry_wait(vma->vm_mm, fe->pmd, fe->address);
2545 } else if (is_hwpoison_entry(entry)) { 2534 } else if (is_hwpoison_entry(entry)) {
2546 ret = VM_FAULT_HWPOISON; 2535 ret = VM_FAULT_HWPOISON;
2547 } else { 2536 } else {
2548 print_bad_pte(vma, address, orig_pte, NULL); 2537 print_bad_pte(vma, fe->address, orig_pte, NULL);
2549 ret = VM_FAULT_SIGBUS; 2538 ret = VM_FAULT_SIGBUS;
2550 } 2539 }
2551 goto out; 2540 goto out;
@@ -2554,14 +2543,15 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2554 page = lookup_swap_cache(entry); 2543 page = lookup_swap_cache(entry);
2555 if (!page) { 2544 if (!page) {
2556 page = swapin_readahead(entry, 2545 page = swapin_readahead(entry,
2557 GFP_HIGHUSER_MOVABLE, vma, address); 2546 GFP_HIGHUSER_MOVABLE, vma, fe->address);
2558 if (!page) { 2547 if (!page) {
2559 /* 2548 /*
2560 * Back out if somebody else faulted in this pte 2549 * Back out if somebody else faulted in this pte
2561 * while we released the pte lock. 2550 * while we released the pte lock.
2562 */ 2551 */
2563 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2552 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd,
2564 if (likely(pte_same(*page_table, orig_pte))) 2553 fe->address, &fe->ptl);
2554 if (likely(pte_same(*fe->pte, orig_pte)))
2565 ret = VM_FAULT_OOM; 2555 ret = VM_FAULT_OOM;
2566 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2556 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2567 goto unlock; 2557 goto unlock;
@@ -2570,7 +2560,7 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2570 /* Had to read the page from swap area: Major fault */ 2560 /* Had to read the page from swap area: Major fault */
2571 ret = VM_FAULT_MAJOR; 2561 ret = VM_FAULT_MAJOR;
2572 count_vm_event(PGMAJFAULT); 2562 count_vm_event(PGMAJFAULT);
2573 mem_cgroup_count_vm_event(mm, PGMAJFAULT); 2563 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
2574 } else if (PageHWPoison(page)) { 2564 } else if (PageHWPoison(page)) {
2575 /* 2565 /*
2576 * hwpoisoned dirty swapcache pages are kept for killing 2566 * hwpoisoned dirty swapcache pages are kept for killing
@@ -2583,7 +2573,7 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2583 } 2573 }
2584 2574
2585 swapcache = page; 2575 swapcache = page;
2586 locked = lock_page_or_retry(page, mm, flags); 2576 locked = lock_page_or_retry(page, vma->vm_mm, fe->flags);
2587 2577
2588 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2578 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2589 if (!locked) { 2579 if (!locked) {
@@ -2600,14 +2590,15 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2600 if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) 2590 if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
2601 goto out_page; 2591 goto out_page;
2602 2592
2603 page = ksm_might_need_to_copy(page, vma, address); 2593 page = ksm_might_need_to_copy(page, vma, fe->address);
2604 if (unlikely(!page)) { 2594 if (unlikely(!page)) {
2605 ret = VM_FAULT_OOM; 2595 ret = VM_FAULT_OOM;
2606 page = swapcache; 2596 page = swapcache;
2607 goto out_page; 2597 goto out_page;
2608 } 2598 }
2609 2599
2610 if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) { 2600 if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
2601 &memcg, false)) {
2611 ret = VM_FAULT_OOM; 2602 ret = VM_FAULT_OOM;
2612 goto out_page; 2603 goto out_page;
2613 } 2604 }
@@ -2615,8 +2606,9 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2615 /* 2606 /*
2616 * Back out if somebody else already faulted in this pte. 2607 * Back out if somebody else already faulted in this pte.
2617 */ 2608 */
2618 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2609 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
2619 if (unlikely(!pte_same(*page_table, orig_pte))) 2610 &fe->ptl);
2611 if (unlikely(!pte_same(*fe->pte, orig_pte)))
2620 goto out_nomap; 2612 goto out_nomap;
2621 2613
2622 if (unlikely(!PageUptodate(page))) { 2614 if (unlikely(!PageUptodate(page))) {
@@ -2634,24 +2626,24 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2634 * must be called after the swap_free(), or it will never succeed. 2626 * must be called after the swap_free(), or it will never succeed.
2635 */ 2627 */
2636 2628
2637 inc_mm_counter_fast(mm, MM_ANONPAGES); 2629 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
2638 dec_mm_counter_fast(mm, MM_SWAPENTS); 2630 dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
2639 pte = mk_pte(page, vma->vm_page_prot); 2631 pte = mk_pte(page, vma->vm_page_prot);
2640 if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { 2632 if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
2641 pte = maybe_mkwrite(pte_mkdirty(pte), vma); 2633 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
2642 flags &= ~FAULT_FLAG_WRITE; 2634 fe->flags &= ~FAULT_FLAG_WRITE;
2643 ret |= VM_FAULT_WRITE; 2635 ret |= VM_FAULT_WRITE;
2644 exclusive = RMAP_EXCLUSIVE; 2636 exclusive = RMAP_EXCLUSIVE;
2645 } 2637 }
2646 flush_icache_page(vma, page); 2638 flush_icache_page(vma, page);
2647 if (pte_swp_soft_dirty(orig_pte)) 2639 if (pte_swp_soft_dirty(orig_pte))
2648 pte = pte_mksoft_dirty(pte); 2640 pte = pte_mksoft_dirty(pte);
2649 set_pte_at(mm, address, page_table, pte); 2641 set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
2650 if (page == swapcache) { 2642 if (page == swapcache) {
2651 do_page_add_anon_rmap(page, vma, address, exclusive); 2643 do_page_add_anon_rmap(page, vma, fe->address, exclusive);
2652 mem_cgroup_commit_charge(page, memcg, true, false); 2644 mem_cgroup_commit_charge(page, memcg, true, false);
2653 } else { /* ksm created a completely new copy */ 2645 } else { /* ksm created a completely new copy */
2654 page_add_new_anon_rmap(page, vma, address, false); 2646 page_add_new_anon_rmap(page, vma, fe->address, false);
2655 mem_cgroup_commit_charge(page, memcg, false, false); 2647 mem_cgroup_commit_charge(page, memcg, false, false);
2656 lru_cache_add_active_or_unevictable(page, vma); 2648 lru_cache_add_active_or_unevictable(page, vma);
2657 } 2649 }
@@ -2674,22 +2666,22 @@ int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2674 put_page(swapcache); 2666 put_page(swapcache);
2675 } 2667 }
2676 2668
2677 if (flags & FAULT_FLAG_WRITE) { 2669 if (fe->flags & FAULT_FLAG_WRITE) {
2678 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); 2670 ret |= do_wp_page(fe, pte);
2679 if (ret & VM_FAULT_ERROR) 2671 if (ret & VM_FAULT_ERROR)
2680 ret &= VM_FAULT_ERROR; 2672 ret &= VM_FAULT_ERROR;
2681 goto out; 2673 goto out;
2682 } 2674 }
2683 2675
2684 /* No need to invalidate - it was non-present before */ 2676 /* No need to invalidate - it was non-present before */
2685 update_mmu_cache(vma, address, page_table); 2677 update_mmu_cache(vma, fe->address, fe->pte);
2686unlock: 2678unlock:
2687 pte_unmap_unlock(page_table, ptl); 2679 pte_unmap_unlock(fe->pte, fe->ptl);
2688out: 2680out:
2689 return ret; 2681 return ret;
2690out_nomap: 2682out_nomap:
2691 mem_cgroup_cancel_charge(page, memcg, false); 2683 mem_cgroup_cancel_charge(page, memcg, false);
2692 pte_unmap_unlock(page_table, ptl); 2684 pte_unmap_unlock(fe->pte, fe->ptl);
2693out_page: 2685out_page:
2694 unlock_page(page); 2686 unlock_page(page);
2695out_release: 2687out_release:
@@ -2740,37 +2732,36 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
2740 * but allow concurrent faults), and pte mapped but not yet locked. 2732 * but allow concurrent faults), and pte mapped but not yet locked.
2741 * We return with mmap_sem still held, but pte unmapped and unlocked. 2733 * We return with mmap_sem still held, but pte unmapped and unlocked.
2742 */ 2734 */
2743static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, 2735static int do_anonymous_page(struct fault_env *fe)
2744 unsigned long address, pte_t *page_table, pmd_t *pmd,
2745 unsigned int flags)
2746{ 2736{
2737 struct vm_area_struct *vma = fe->vma;
2747 struct mem_cgroup *memcg; 2738 struct mem_cgroup *memcg;
2748 struct page *page; 2739 struct page *page;
2749 spinlock_t *ptl;
2750 pte_t entry; 2740 pte_t entry;
2751 2741
2752 pte_unmap(page_table); 2742 pte_unmap(fe->pte);
2753 2743
2754 /* File mapping without ->vm_ops ? */ 2744 /* File mapping without ->vm_ops ? */
2755 if (vma->vm_flags & VM_SHARED) 2745 if (vma->vm_flags & VM_SHARED)
2756 return VM_FAULT_SIGBUS; 2746 return VM_FAULT_SIGBUS;
2757 2747
2758 /* Check if we need to add a guard page to the stack */ 2748 /* Check if we need to add a guard page to the stack */
2759 if (check_stack_guard_page(vma, address) < 0) 2749 if (check_stack_guard_page(vma, fe->address) < 0)
2760 return VM_FAULT_SIGSEGV; 2750 return VM_FAULT_SIGSEGV;
2761 2751
2762 /* Use the zero-page for reads */ 2752 /* Use the zero-page for reads */
2763 if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) { 2753 if (!(fe->flags & FAULT_FLAG_WRITE) &&
2764 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), 2754 !mm_forbids_zeropage(vma->vm_mm)) {
2755 entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address),
2765 vma->vm_page_prot)); 2756 vma->vm_page_prot));
2766 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2757 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
2767 if (!pte_none(*page_table)) 2758 &fe->ptl);
2759 if (!pte_none(*fe->pte))
2768 goto unlock; 2760 goto unlock;
2769 /* Deliver the page fault to userland, check inside PT lock */ 2761 /* Deliver the page fault to userland, check inside PT lock */
2770 if (userfaultfd_missing(vma)) { 2762 if (userfaultfd_missing(vma)) {
2771 pte_unmap_unlock(page_table, ptl); 2763 pte_unmap_unlock(fe->pte, fe->ptl);
2772 return handle_userfault(vma, address, flags, 2764 return handle_userfault(fe, VM_UFFD_MISSING);
2773 VM_UFFD_MISSING);
2774 } 2765 }
2775 goto setpte; 2766 goto setpte;
2776 } 2767 }
@@ -2778,11 +2769,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2778 /* Allocate our own private page. */ 2769 /* Allocate our own private page. */
2779 if (unlikely(anon_vma_prepare(vma))) 2770 if (unlikely(anon_vma_prepare(vma)))
2780 goto oom; 2771 goto oom;
2781 page = alloc_zeroed_user_highpage_movable(vma, address); 2772 page = alloc_zeroed_user_highpage_movable(vma, fe->address);
2782 if (!page) 2773 if (!page)
2783 goto oom; 2774 goto oom;
2784 2775
2785 if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) 2776 if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
2786 goto oom_free_page; 2777 goto oom_free_page;
2787 2778
2788 /* 2779 /*
@@ -2796,30 +2787,30 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2796 if (vma->vm_flags & VM_WRITE) 2787 if (vma->vm_flags & VM_WRITE)
2797 entry = pte_mkwrite(pte_mkdirty(entry)); 2788 entry = pte_mkwrite(pte_mkdirty(entry));
2798 2789
2799 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2790 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
2800 if (!pte_none(*page_table)) 2791 &fe->ptl);
2792 if (!pte_none(*fe->pte))
2801 goto release; 2793 goto release;
2802 2794
2803 /* Deliver the page fault to userland, check inside PT lock */ 2795 /* Deliver the page fault to userland, check inside PT lock */
2804 if (userfaultfd_missing(vma)) { 2796 if (userfaultfd_missing(vma)) {
2805 pte_unmap_unlock(page_table, ptl); 2797 pte_unmap_unlock(fe->pte, fe->ptl);
2806 mem_cgroup_cancel_charge(page, memcg, false); 2798 mem_cgroup_cancel_charge(page, memcg, false);
2807 put_page(page); 2799 put_page(page);
2808 return handle_userfault(vma, address, flags, 2800 return handle_userfault(fe, VM_UFFD_MISSING);
2809 VM_UFFD_MISSING);
2810 } 2801 }
2811 2802
2812 inc_mm_counter_fast(mm, MM_ANONPAGES); 2803 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
2813 page_add_new_anon_rmap(page, vma, address, false); 2804 page_add_new_anon_rmap(page, vma, fe->address, false);
2814 mem_cgroup_commit_charge(page, memcg, false, false); 2805 mem_cgroup_commit_charge(page, memcg, false, false);
2815 lru_cache_add_active_or_unevictable(page, vma); 2806 lru_cache_add_active_or_unevictable(page, vma);
2816setpte: 2807setpte:
2817 set_pte_at(mm, address, page_table, entry); 2808 set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
2818 2809
2819 /* No need to invalidate - it was non-present before */ 2810 /* No need to invalidate - it was non-present before */
2820 update_mmu_cache(vma, address, page_table); 2811 update_mmu_cache(vma, fe->address, fe->pte);
2821unlock: 2812unlock:
2822 pte_unmap_unlock(page_table, ptl); 2813 pte_unmap_unlock(fe->pte, fe->ptl);
2823 return 0; 2814 return 0;
2824release: 2815release:
2825 mem_cgroup_cancel_charge(page, memcg, false); 2816 mem_cgroup_cancel_charge(page, memcg, false);
@@ -2836,17 +2827,16 @@ oom:
2836 * released depending on flags and vma->vm_ops->fault() return value. 2827 * released depending on flags and vma->vm_ops->fault() return value.
2837 * See filemap_fault() and __lock_page_retry(). 2828 * See filemap_fault() and __lock_page_retry().
2838 */ 2829 */
2839static int __do_fault(struct vm_area_struct *vma, unsigned long address, 2830static int __do_fault(struct fault_env *fe, pgoff_t pgoff,
2840 pgoff_t pgoff, unsigned int flags, 2831 struct page *cow_page, struct page **page, void **entry)
2841 struct page *cow_page, struct page **page,
2842 void **entry)
2843{ 2832{
2833 struct vm_area_struct *vma = fe->vma;
2844 struct vm_fault vmf; 2834 struct vm_fault vmf;
2845 int ret; 2835 int ret;
2846 2836
2847 vmf.virtual_address = (void __user *)(address & PAGE_MASK); 2837 vmf.virtual_address = (void __user *)(fe->address & PAGE_MASK);
2848 vmf.pgoff = pgoff; 2838 vmf.pgoff = pgoff;
2849 vmf.flags = flags; 2839 vmf.flags = fe->flags;
2850 vmf.page = NULL; 2840 vmf.page = NULL;
2851 vmf.gfp_mask = __get_fault_gfp_mask(vma); 2841 vmf.gfp_mask = __get_fault_gfp_mask(vma);
2852 vmf.cow_page = cow_page; 2842 vmf.cow_page = cow_page;
@@ -2878,38 +2868,36 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
2878/** 2868/**
2879 * do_set_pte - setup new PTE entry for given page and add reverse page mapping. 2869 * do_set_pte - setup new PTE entry for given page and add reverse page mapping.
2880 * 2870 *
2881 * @vma: virtual memory area 2871 * @fe: fault environment
2882 * @address: user virtual address
2883 * @page: page to map 2872 * @page: page to map
2884 * @pte: pointer to target page table entry
2885 * @write: true, if new entry is writable
2886 * @anon: true, if it's anonymous page
2887 * 2873 *
2888 * Caller must hold page table lock relevant for @pte. 2874 * Caller must hold page table lock relevant for @fe->pte.
2889 * 2875 *
2890 * Target users are page handler itself and implementations of 2876 * Target users are page handler itself and implementations of
2891 * vm_ops->map_pages. 2877 * vm_ops->map_pages.
2892 */ 2878 */
2893void do_set_pte(struct vm_area_struct *vma, unsigned long address, 2879void do_set_pte(struct fault_env *fe, struct page *page)
2894 struct page *page, pte_t *pte, bool write, bool anon)
2895{ 2880{
2881 struct vm_area_struct *vma = fe->vma;
2882 bool write = fe->flags & FAULT_FLAG_WRITE;
2896 pte_t entry; 2883 pte_t entry;
2897 2884
2898 flush_icache_page(vma, page); 2885 flush_icache_page(vma, page);
2899 entry = mk_pte(page, vma->vm_page_prot); 2886 entry = mk_pte(page, vma->vm_page_prot);
2900 if (write) 2887 if (write)
2901 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2888 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2902 if (anon) { 2889 /* copy-on-write page */
2890 if (write && !(vma->vm_flags & VM_SHARED)) {
2903 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); 2891 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
2904 page_add_new_anon_rmap(page, vma, address, false); 2892 page_add_new_anon_rmap(page, vma, fe->address, false);
2905 } else { 2893 } else {
2906 inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); 2894 inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
2907 page_add_file_rmap(page); 2895 page_add_file_rmap(page);
2908 } 2896 }
2909 set_pte_at(vma->vm_mm, address, pte, entry); 2897 set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
2910 2898
2911 /* no need to invalidate: a not-present page won't be cached */ 2899 /* no need to invalidate: a not-present page won't be cached */
2912 update_mmu_cache(vma, address, pte); 2900 update_mmu_cache(vma, fe->address, fe->pte);
2913} 2901}
2914 2902
2915static unsigned long fault_around_bytes __read_mostly = 2903static unsigned long fault_around_bytes __read_mostly =
@@ -2976,57 +2964,53 @@ late_initcall(fault_around_debugfs);
2976 * fault_around_pages() value (and therefore to page order). This way it's 2964 * fault_around_pages() value (and therefore to page order). This way it's
2977 * easier to guarantee that we don't cross page table boundaries. 2965 * easier to guarantee that we don't cross page table boundaries.
2978 */ 2966 */
2979static void do_fault_around(struct vm_area_struct *vma, unsigned long address, 2967static void do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
2980 pte_t *pte, pgoff_t pgoff, unsigned int flags)
2981{ 2968{
2982 unsigned long start_addr, nr_pages, mask; 2969 unsigned long address = fe->address, start_addr, nr_pages, mask;
2983 pgoff_t max_pgoff; 2970 pte_t *pte = fe->pte;
2984 struct vm_fault vmf; 2971 pgoff_t end_pgoff;
2985 int off; 2972 int off;
2986 2973
2987 nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; 2974 nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
2988 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; 2975 mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
2989 2976
2990 start_addr = max(address & mask, vma->vm_start); 2977 start_addr = max(fe->address & mask, fe->vma->vm_start);
2991 off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); 2978 off = ((fe->address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
2992 pte -= off; 2979 fe->pte -= off;
2993 pgoff -= off; 2980 start_pgoff -= off;
2994 2981
2995 /* 2982 /*
2996 * max_pgoff is either end of page table or end of vma 2983 * end_pgoff is either end of page table or end of vma
2997 * or fault_around_pages() from pgoff, depending what is nearest. 2984 * or fault_around_pages() from start_pgoff, depending what is nearest.
2998 */ 2985 */
2999 max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + 2986 end_pgoff = start_pgoff -
2987 ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
3000 PTRS_PER_PTE - 1; 2988 PTRS_PER_PTE - 1;
3001 max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1, 2989 end_pgoff = min3(end_pgoff, vma_pages(fe->vma) + fe->vma->vm_pgoff - 1,
3002 pgoff + nr_pages - 1); 2990 start_pgoff + nr_pages - 1);
3003 2991
3004 /* Check if it makes any sense to call ->map_pages */ 2992 /* Check if it makes any sense to call ->map_pages */
3005 while (!pte_none(*pte)) { 2993 fe->address = start_addr;
3006 if (++pgoff > max_pgoff) 2994 while (!pte_none(*fe->pte)) {
3007 return; 2995 if (++start_pgoff > end_pgoff)
3008 start_addr += PAGE_SIZE; 2996 goto out;
3009 if (start_addr >= vma->vm_end) 2997 fe->address += PAGE_SIZE;
3010 return; 2998 if (fe->address >= fe->vma->vm_end)
3011 pte++; 2999 goto out;
3000 fe->pte++;
3012 } 3001 }
3013 3002
3014 vmf.virtual_address = (void __user *) start_addr; 3003 fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
3015 vmf.pte = pte; 3004out:
3016 vmf.pgoff = pgoff; 3005 /* restore fault_env */
3017 vmf.max_pgoff = max_pgoff; 3006 fe->pte = pte;
3018 vmf.flags = flags; 3007 fe->address = address;
3019 vmf.gfp_mask = __get_fault_gfp_mask(vma);
3020 vma->vm_ops->map_pages(vma, &vmf);
3021} 3008}
3022 3009
3023static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3010static int do_read_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte)
3024 unsigned long address, pmd_t *pmd,
3025 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
3026{ 3011{
3012 struct vm_area_struct *vma = fe->vma;
3027 struct page *fault_page; 3013 struct page *fault_page;
3028 spinlock_t *ptl;
3029 pte_t *pte;
3030 int ret = 0; 3014 int ret = 0;
3031 3015
3032 /* 3016 /*
@@ -3035,66 +3019,68 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3035 * something). 3019 * something).
3036 */ 3020 */
3037 if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { 3021 if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
3038 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 3022 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
3039 do_fault_around(vma, address, pte, pgoff, flags); 3023 &fe->ptl);
3040 if (!pte_same(*pte, orig_pte)) 3024 if (!pte_same(*fe->pte, orig_pte))
3025 goto unlock_out;
3026 do_fault_around(fe, pgoff);
3027 /* Check if the fault is handled by faultaround */
3028 if (!pte_same(*fe->pte, orig_pte))
3041 goto unlock_out; 3029 goto unlock_out;
3042 pte_unmap_unlock(pte, ptl); 3030 pte_unmap_unlock(fe->pte, fe->ptl);
3043 } 3031 }
3044 3032
3045 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL); 3033 ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
3046 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 3034 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
3047 return ret; 3035 return ret;
3048 3036
3049 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 3037 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, &fe->ptl);
3050 if (unlikely(!pte_same(*pte, orig_pte))) { 3038 if (unlikely(!pte_same(*fe->pte, orig_pte))) {
3051 pte_unmap_unlock(pte, ptl); 3039 pte_unmap_unlock(fe->pte, fe->ptl);
3052 unlock_page(fault_page); 3040 unlock_page(fault_page);
3053 put_page(fault_page); 3041 put_page(fault_page);
3054 return ret; 3042 return ret;
3055 } 3043 }
3056 do_set_pte(vma, address, fault_page, pte, false, false); 3044 do_set_pte(fe, fault_page);
3057 unlock_page(fault_page); 3045 unlock_page(fault_page);
3058unlock_out: 3046unlock_out:
3059 pte_unmap_unlock(pte, ptl); 3047 pte_unmap_unlock(fe->pte, fe->ptl);
3060 return ret; 3048 return ret;
3061} 3049}
3062 3050
3063static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3051static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte)
3064 unsigned long address, pmd_t *pmd,
3065 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
3066{ 3052{
3053 struct vm_area_struct *vma = fe->vma;
3067 struct page *fault_page, *new_page; 3054 struct page *fault_page, *new_page;
3068 void *fault_entry; 3055 void *fault_entry;
3069 struct mem_cgroup *memcg; 3056 struct mem_cgroup *memcg;
3070 spinlock_t *ptl;
3071 pte_t *pte;
3072 int ret; 3057 int ret;
3073 3058
3074 if (unlikely(anon_vma_prepare(vma))) 3059 if (unlikely(anon_vma_prepare(vma)))
3075 return VM_FAULT_OOM; 3060 return VM_FAULT_OOM;
3076 3061
3077 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 3062 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, fe->address);
3078 if (!new_page) 3063 if (!new_page)
3079 return VM_FAULT_OOM; 3064 return VM_FAULT_OOM;
3080 3065
3081 if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) { 3066 if (mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
3067 &memcg, false)) {
3082 put_page(new_page); 3068 put_page(new_page);
3083 return VM_FAULT_OOM; 3069 return VM_FAULT_OOM;
3084 } 3070 }
3085 3071
3086 ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page, 3072 ret = __do_fault(fe, pgoff, new_page, &fault_page, &fault_entry);
3087 &fault_entry);
3088 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 3073 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
3089 goto uncharge_out; 3074 goto uncharge_out;
3090 3075
3091 if (!(ret & VM_FAULT_DAX_LOCKED)) 3076 if (!(ret & VM_FAULT_DAX_LOCKED))
3092 copy_user_highpage(new_page, fault_page, address, vma); 3077 copy_user_highpage(new_page, fault_page, fe->address, vma);
3093 __SetPageUptodate(new_page); 3078 __SetPageUptodate(new_page);
3094 3079
3095 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 3080 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
3096 if (unlikely(!pte_same(*pte, orig_pte))) { 3081 &fe->ptl);
3097 pte_unmap_unlock(pte, ptl); 3082 if (unlikely(!pte_same(*fe->pte, orig_pte))) {
3083 pte_unmap_unlock(fe->pte, fe->ptl);
3098 if (!(ret & VM_FAULT_DAX_LOCKED)) { 3084 if (!(ret & VM_FAULT_DAX_LOCKED)) {
3099 unlock_page(fault_page); 3085 unlock_page(fault_page);
3100 put_page(fault_page); 3086 put_page(fault_page);
@@ -3104,10 +3090,10 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3104 } 3090 }
3105 goto uncharge_out; 3091 goto uncharge_out;
3106 } 3092 }
3107 do_set_pte(vma, address, new_page, pte, true, true); 3093 do_set_pte(fe, new_page);
3108 mem_cgroup_commit_charge(new_page, memcg, false, false); 3094 mem_cgroup_commit_charge(new_page, memcg, false, false);
3109 lru_cache_add_active_or_unevictable(new_page, vma); 3095 lru_cache_add_active_or_unevictable(new_page, vma);
3110 pte_unmap_unlock(pte, ptl); 3096 pte_unmap_unlock(fe->pte, fe->ptl);
3111 if (!(ret & VM_FAULT_DAX_LOCKED)) { 3097 if (!(ret & VM_FAULT_DAX_LOCKED)) {
3112 unlock_page(fault_page); 3098 unlock_page(fault_page);
3113 put_page(fault_page); 3099 put_page(fault_page);
@@ -3121,18 +3107,15 @@ uncharge_out:
3121 return ret; 3107 return ret;
3122} 3108}
3123 3109
3124static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3110static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff, pte_t orig_pte)
3125 unsigned long address, pmd_t *pmd,
3126 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
3127{ 3111{
3112 struct vm_area_struct *vma = fe->vma;
3128 struct page *fault_page; 3113 struct page *fault_page;
3129 struct address_space *mapping; 3114 struct address_space *mapping;
3130 spinlock_t *ptl;
3131 pte_t *pte;
3132 int dirtied = 0; 3115 int dirtied = 0;
3133 int ret, tmp; 3116 int ret, tmp;
3134 3117
3135 ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page, NULL); 3118 ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL);
3136 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) 3119 if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
3137 return ret; 3120 return ret;
3138 3121
@@ -3142,7 +3125,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3142 */ 3125 */
3143 if (vma->vm_ops->page_mkwrite) { 3126 if (vma->vm_ops->page_mkwrite) {
3144 unlock_page(fault_page); 3127 unlock_page(fault_page);
3145 tmp = do_page_mkwrite(vma, fault_page, address); 3128 tmp = do_page_mkwrite(vma, fault_page, fe->address);
3146 if (unlikely(!tmp || 3129 if (unlikely(!tmp ||
3147 (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { 3130 (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
3148 put_page(fault_page); 3131 put_page(fault_page);
@@ -3150,15 +3133,16 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3150 } 3133 }
3151 } 3134 }
3152 3135
3153 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 3136 fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address,
3154 if (unlikely(!pte_same(*pte, orig_pte))) { 3137 &fe->ptl);
3155 pte_unmap_unlock(pte, ptl); 3138 if (unlikely(!pte_same(*fe->pte, orig_pte))) {
3139 pte_unmap_unlock(fe->pte, fe->ptl);
3156 unlock_page(fault_page); 3140 unlock_page(fault_page);
3157 put_page(fault_page); 3141 put_page(fault_page);
3158 return ret; 3142 return ret;
3159 } 3143 }
3160 do_set_pte(vma, address, fault_page, pte, true, false); 3144 do_set_pte(fe, fault_page);
3161 pte_unmap_unlock(pte, ptl); 3145 pte_unmap_unlock(fe->pte, fe->ptl);
3162 3146
3163 if (set_page_dirty(fault_page)) 3147 if (set_page_dirty(fault_page))
3164 dirtied = 1; 3148 dirtied = 1;
@@ -3190,23 +3174,20 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3190 * The mmap_sem may have been released depending on flags and our 3174 * The mmap_sem may have been released depending on flags and our
3191 * return value. See filemap_fault() and __lock_page_or_retry(). 3175 * return value. See filemap_fault() and __lock_page_or_retry().
3192 */ 3176 */
3193static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, 3177static int do_fault(struct fault_env *fe, pte_t orig_pte)
3194 unsigned long address, pte_t *page_table, pmd_t *pmd,
3195 unsigned int flags, pte_t orig_pte)
3196{ 3178{
3197 pgoff_t pgoff = linear_page_index(vma, address); 3179 struct vm_area_struct *vma = fe->vma;
3180 pgoff_t pgoff = linear_page_index(vma, fe->address);
3198 3181
3199 pte_unmap(page_table); 3182 pte_unmap(fe->pte);
3200 /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ 3183 /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
3201 if (!vma->vm_ops->fault) 3184 if (!vma->vm_ops->fault)
3202 return VM_FAULT_SIGBUS; 3185 return VM_FAULT_SIGBUS;
3203 if (!(flags & FAULT_FLAG_WRITE)) 3186 if (!(fe->flags & FAULT_FLAG_WRITE))
3204 return do_read_fault(mm, vma, address, pmd, pgoff, flags, 3187 return do_read_fault(fe, pgoff, orig_pte);
3205 orig_pte);
3206 if (!(vma->vm_flags & VM_SHARED)) 3188 if (!(vma->vm_flags & VM_SHARED))
3207 return do_cow_fault(mm, vma, address, pmd, pgoff, flags, 3189 return do_cow_fault(fe, pgoff, orig_pte);
3208 orig_pte); 3190 return do_shared_fault(fe, pgoff, orig_pte);
3209 return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
3210} 3191}
3211 3192
3212static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 3193static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
@@ -3224,11 +3205,10 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
3224 return mpol_misplaced(page, vma, addr); 3205 return mpol_misplaced(page, vma, addr);
3225} 3206}
3226 3207
3227static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 3208static int do_numa_page(struct fault_env *fe, pte_t pte)
3228 unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
3229{ 3209{
3210 struct vm_area_struct *vma = fe->vma;
3230 struct page *page = NULL; 3211 struct page *page = NULL;
3231 spinlock_t *ptl;
3232 int page_nid = -1; 3212 int page_nid = -1;
3233 int last_cpupid; 3213 int last_cpupid;
3234 int target_nid; 3214 int target_nid;
@@ -3248,10 +3228,10 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3248 * page table entry is not accessible, so there would be no 3228 * page table entry is not accessible, so there would be no
3249 * concurrent hardware modifications to the PTE. 3229 * concurrent hardware modifications to the PTE.
3250 */ 3230 */
3251 ptl = pte_lockptr(mm, pmd); 3231 fe->ptl = pte_lockptr(vma->vm_mm, fe->pmd);
3252 spin_lock(ptl); 3232 spin_lock(fe->ptl);
3253 if (unlikely(!pte_same(*ptep, pte))) { 3233 if (unlikely(!pte_same(*fe->pte, pte))) {
3254 pte_unmap_unlock(ptep, ptl); 3234 pte_unmap_unlock(fe->pte, fe->ptl);
3255 goto out; 3235 goto out;
3256 } 3236 }
3257 3237
@@ -3260,18 +3240,18 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3260 pte = pte_mkyoung(pte); 3240 pte = pte_mkyoung(pte);
3261 if (was_writable) 3241 if (was_writable)
3262 pte = pte_mkwrite(pte); 3242 pte = pte_mkwrite(pte);
3263 set_pte_at(mm, addr, ptep, pte); 3243 set_pte_at(vma->vm_mm, fe->address, fe->pte, pte);
3264 update_mmu_cache(vma, addr, ptep); 3244 update_mmu_cache(vma, fe->address, fe->pte);
3265 3245
3266 page = vm_normal_page(vma, addr, pte); 3246 page = vm_normal_page(vma, fe->address, pte);
3267 if (!page) { 3247 if (!page) {
3268 pte_unmap_unlock(ptep, ptl); 3248 pte_unmap_unlock(fe->pte, fe->ptl);
3269 return 0; 3249 return 0;
3270 } 3250 }
3271 3251
3272 /* TODO: handle PTE-mapped THP */ 3252 /* TODO: handle PTE-mapped THP */
3273 if (PageCompound(page)) { 3253 if (PageCompound(page)) {
3274 pte_unmap_unlock(ptep, ptl); 3254 pte_unmap_unlock(fe->pte, fe->ptl);
3275 return 0; 3255 return 0;
3276 } 3256 }
3277 3257
@@ -3295,8 +3275,9 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3295 3275
3296 last_cpupid = page_cpupid_last(page); 3276 last_cpupid = page_cpupid_last(page);
3297 page_nid = page_to_nid(page); 3277 page_nid = page_to_nid(page);
3298 target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags); 3278 target_nid = numa_migrate_prep(page, vma, fe->address, page_nid,
3299 pte_unmap_unlock(ptep, ptl); 3279 &flags);
3280 pte_unmap_unlock(fe->pte, fe->ptl);
3300 if (target_nid == -1) { 3281 if (target_nid == -1) {
3301 put_page(page); 3282 put_page(page);
3302 goto out; 3283 goto out;
@@ -3316,24 +3297,24 @@ out:
3316 return 0; 3297 return 0;
3317} 3298}
3318 3299
3319static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, 3300static int create_huge_pmd(struct fault_env *fe)
3320 unsigned long address, pmd_t *pmd, unsigned int flags)
3321{ 3301{
3302 struct vm_area_struct *vma = fe->vma;
3322 if (vma_is_anonymous(vma)) 3303 if (vma_is_anonymous(vma))
3323 return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags); 3304 return do_huge_pmd_anonymous_page(fe);
3324 if (vma->vm_ops->pmd_fault) 3305 if (vma->vm_ops->pmd_fault)
3325 return vma->vm_ops->pmd_fault(vma, address, pmd, flags); 3306 return vma->vm_ops->pmd_fault(vma, fe->address, fe->pmd,
3307 fe->flags);
3326 return VM_FAULT_FALLBACK; 3308 return VM_FAULT_FALLBACK;
3327} 3309}
3328 3310
3329static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, 3311static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
3330 unsigned long address, pmd_t *pmd, pmd_t orig_pmd,
3331 unsigned int flags)
3332{ 3312{
3333 if (vma_is_anonymous(vma)) 3313 if (vma_is_anonymous(fe->vma))
3334 return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd); 3314 return do_huge_pmd_wp_page(fe, orig_pmd);
3335 if (vma->vm_ops->pmd_fault) 3315 if (fe->vma->vm_ops->pmd_fault)
3336 return vma->vm_ops->pmd_fault(vma, address, pmd, flags); 3316 return fe->vma->vm_ops->pmd_fault(fe->vma, fe->address, fe->pmd,
3317 fe->flags);
3337 return VM_FAULT_FALLBACK; 3318 return VM_FAULT_FALLBACK;
3338} 3319}
3339 3320
@@ -3353,12 +3334,9 @@ static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
3353 * The mmap_sem may have been released depending on flags and our 3334 * The mmap_sem may have been released depending on flags and our
3354 * return value. See filemap_fault() and __lock_page_or_retry(). 3335 * return value. See filemap_fault() and __lock_page_or_retry().
3355 */ 3336 */
3356static int handle_pte_fault(struct mm_struct *mm, 3337static int handle_pte_fault(struct fault_env *fe)
3357 struct vm_area_struct *vma, unsigned long address,
3358 pte_t *pte, pmd_t *pmd, unsigned int flags)
3359{ 3338{
3360 pte_t entry; 3339 pte_t entry;
3361 spinlock_t *ptl;
3362 3340
3363 /* 3341 /*
3364 * some architectures can have larger ptes than wordsize, 3342 * some architectures can have larger ptes than wordsize,
@@ -3368,37 +3346,34 @@ static int handle_pte_fault(struct mm_struct *mm,
3368 * we later double check anyway with the ptl lock held. So here 3346 * we later double check anyway with the ptl lock held. So here
3369 * a barrier will do. 3347 * a barrier will do.
3370 */ 3348 */
3371 entry = *pte; 3349 entry = *fe->pte;
3372 barrier(); 3350 barrier();
3373 if (!pte_present(entry)) { 3351 if (!pte_present(entry)) {
3374 if (pte_none(entry)) { 3352 if (pte_none(entry)) {
3375 if (vma_is_anonymous(vma)) 3353 if (vma_is_anonymous(fe->vma))
3376 return do_anonymous_page(mm, vma, address, 3354 return do_anonymous_page(fe);
3377 pte, pmd, flags);
3378 else 3355 else
3379 return do_fault(mm, vma, address, pte, pmd, 3356 return do_fault(fe, entry);
3380 flags, entry);
3381 } 3357 }
3382 return do_swap_page(mm, vma, address, 3358 return do_swap_page(fe, entry);
3383 pte, pmd, flags, entry);
3384 } 3359 }
3385 3360
3386 if (pte_protnone(entry)) 3361 if (pte_protnone(entry))
3387 return do_numa_page(mm, vma, address, entry, pte, pmd); 3362 return do_numa_page(fe, entry);
3388 3363
3389 ptl = pte_lockptr(mm, pmd); 3364 fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
3390 spin_lock(ptl); 3365 spin_lock(fe->ptl);
3391 if (unlikely(!pte_same(*pte, entry))) 3366 if (unlikely(!pte_same(*fe->pte, entry)))
3392 goto unlock; 3367 goto unlock;
3393 if (flags & FAULT_FLAG_WRITE) { 3368 if (fe->flags & FAULT_FLAG_WRITE) {
3394 if (!pte_write(entry)) 3369 if (!pte_write(entry))
3395 return do_wp_page(mm, vma, address, 3370 return do_wp_page(fe, entry);
3396 pte, pmd, ptl, entry);
3397 entry = pte_mkdirty(entry); 3371 entry = pte_mkdirty(entry);
3398 } 3372 }
3399 entry = pte_mkyoung(entry); 3373 entry = pte_mkyoung(entry);
3400 if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { 3374 if (ptep_set_access_flags(fe->vma, fe->address, fe->pte, entry,
3401 update_mmu_cache(vma, address, pte); 3375 fe->flags & FAULT_FLAG_WRITE)) {
3376 update_mmu_cache(fe->vma, fe->address, fe->pte);
3402 } else { 3377 } else {
3403 /* 3378 /*
3404 * This is needed only for protection faults but the arch code 3379 * This is needed only for protection faults but the arch code
@@ -3406,11 +3381,11 @@ static int handle_pte_fault(struct mm_struct *mm,
3406 * This still avoids useless tlb flushes for .text page faults 3381 * This still avoids useless tlb flushes for .text page faults
3407 * with threads. 3382 * with threads.
3408 */ 3383 */
3409 if (flags & FAULT_FLAG_WRITE) 3384 if (fe->flags & FAULT_FLAG_WRITE)
3410 flush_tlb_fix_spurious_fault(vma, address); 3385 flush_tlb_fix_spurious_fault(fe->vma, fe->address);
3411 } 3386 }
3412unlock: 3387unlock:
3413 pte_unmap_unlock(pte, ptl); 3388 pte_unmap_unlock(fe->pte, fe->ptl);
3414 return 0; 3389 return 0;
3415} 3390}
3416 3391
@@ -3423,51 +3398,42 @@ unlock:
3423static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, 3398static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3424 unsigned int flags) 3399 unsigned int flags)
3425{ 3400{
3401 struct fault_env fe = {
3402 .vma = vma,
3403 .address = address,
3404 .flags = flags,
3405 };
3426 struct mm_struct *mm = vma->vm_mm; 3406 struct mm_struct *mm = vma->vm_mm;
3427 pgd_t *pgd; 3407 pgd_t *pgd;
3428 pud_t *pud; 3408 pud_t *pud;
3429 pmd_t *pmd;
3430 pte_t *pte;
3431
3432 if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
3433 flags & FAULT_FLAG_INSTRUCTION,
3434 flags & FAULT_FLAG_REMOTE))
3435 return VM_FAULT_SIGSEGV;
3436
3437 if (unlikely(is_vm_hugetlb_page(vma)))
3438 return hugetlb_fault(mm, vma, address, flags);
3439 3409
3440 pgd = pgd_offset(mm, address); 3410 pgd = pgd_offset(mm, address);
3441 pud = pud_alloc(mm, pgd, address); 3411 pud = pud_alloc(mm, pgd, address);
3442 if (!pud) 3412 if (!pud)
3443 return VM_FAULT_OOM; 3413 return VM_FAULT_OOM;
3444 pmd = pmd_alloc(mm, pud, address); 3414 fe.pmd = pmd_alloc(mm, pud, address);
3445 if (!pmd) 3415 if (!fe.pmd)
3446 return VM_FAULT_OOM; 3416 return VM_FAULT_OOM;
3447 if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) { 3417 if (pmd_none(*fe.pmd) && transparent_hugepage_enabled(vma)) {
3448 int ret = create_huge_pmd(mm, vma, address, pmd, flags); 3418 int ret = create_huge_pmd(&fe);
3449 if (!(ret & VM_FAULT_FALLBACK)) 3419 if (!(ret & VM_FAULT_FALLBACK))
3450 return ret; 3420 return ret;
3451 } else { 3421 } else {
3452 pmd_t orig_pmd = *pmd; 3422 pmd_t orig_pmd = *fe.pmd;
3453 int ret; 3423 int ret;
3454 3424
3455 barrier(); 3425 barrier();
3456 if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { 3426 if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
3457 unsigned int dirty = flags & FAULT_FLAG_WRITE;
3458
3459 if (pmd_protnone(orig_pmd)) 3427 if (pmd_protnone(orig_pmd))
3460 return do_huge_pmd_numa_page(mm, vma, address, 3428 return do_huge_pmd_numa_page(&fe, orig_pmd);
3461 orig_pmd, pmd);
3462 3429
3463 if (dirty && !pmd_write(orig_pmd)) { 3430 if ((fe.flags & FAULT_FLAG_WRITE) &&
3464 ret = wp_huge_pmd(mm, vma, address, pmd, 3431 !pmd_write(orig_pmd)) {
3465 orig_pmd, flags); 3432 ret = wp_huge_pmd(&fe, orig_pmd);
3466 if (!(ret & VM_FAULT_FALLBACK)) 3433 if (!(ret & VM_FAULT_FALLBACK))
3467 return ret; 3434 return ret;
3468 } else { 3435 } else {
3469 huge_pmd_set_accessed(mm, vma, address, pmd, 3436 huge_pmd_set_accessed(&fe, orig_pmd);
3470 orig_pmd, dirty);
3471 return 0; 3437 return 0;
3472 } 3438 }
3473 } 3439 }
@@ -3478,7 +3444,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3478 * run pte_offset_map on the pmd, if an huge pmd could 3444 * run pte_offset_map on the pmd, if an huge pmd could
3479 * materialize from under us from a different thread. 3445 * materialize from under us from a different thread.
3480 */ 3446 */
3481 if (unlikely(pte_alloc(mm, pmd, address))) 3447 if (unlikely(pte_alloc(fe.vma->vm_mm, fe.pmd, fe.address)))
3482 return VM_FAULT_OOM; 3448 return VM_FAULT_OOM;
3483 /* 3449 /*
3484 * If a huge pmd materialized under us just retry later. Use 3450 * If a huge pmd materialized under us just retry later. Use
@@ -3491,7 +3457,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3491 * through an atomic read in C, which is what pmd_trans_unstable() 3457 * through an atomic read in C, which is what pmd_trans_unstable()
3492 * provides. 3458 * provides.
3493 */ 3459 */
3494 if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd))) 3460 if (unlikely(pmd_trans_unstable(fe.pmd) || pmd_devmap(*fe.pmd)))
3495 return 0; 3461 return 0;
3496 /* 3462 /*
3497 * A regular pmd is established and it can't morph into a huge pmd 3463 * A regular pmd is established and it can't morph into a huge pmd
@@ -3499,9 +3465,9 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3499 * read mode and khugepaged takes it in write mode. So now it's 3465 * read mode and khugepaged takes it in write mode. So now it's
3500 * safe to run pte_offset_map(). 3466 * safe to run pte_offset_map().
3501 */ 3467 */
3502 pte = pte_offset_map(pmd, address); 3468 fe.pte = pte_offset_map(fe.pmd, fe.address);
3503 3469
3504 return handle_pte_fault(mm, vma, address, pte, pmd, flags); 3470 return handle_pte_fault(&fe);
3505} 3471}
3506 3472
3507/* 3473/*
@@ -3530,7 +3496,15 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3530 if (flags & FAULT_FLAG_USER) 3496 if (flags & FAULT_FLAG_USER)
3531 mem_cgroup_oom_enable(); 3497 mem_cgroup_oom_enable();
3532 3498
3533 ret = __handle_mm_fault(vma, address, flags); 3499 if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
3500 flags & FAULT_FLAG_INSTRUCTION,
3501 flags & FAULT_FLAG_REMOTE))
3502 return VM_FAULT_SIGSEGV;
3503
3504 if (unlikely(is_vm_hugetlb_page(vma)))
3505 ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
3506 else
3507 ret = __handle_mm_fault(vma, address, flags);
3534 3508
3535 if (flags & FAULT_FLAG_USER) { 3509 if (flags & FAULT_FLAG_USER) {
3536 mem_cgroup_oom_disable(); 3510 mem_cgroup_oom_disable();