diff options
author | Hugh Dickins <hugh.dickins@tiscali.co.uk> | 2009-09-21 20:03:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-22 10:17:40 -0400 |
commit | 2a15efc953b26ad57d7d38b9e6782d57e53b4ab2 (patch) | |
tree | f4d04903b3303e80460d2fa3f38da2b7eea82d22 | |
parent | 8e4b9a60718970bbc02dfd3abd0b956ab65af231 (diff) |
mm: follow_hugetlb_page flags
follow_hugetlb_page() shouldn't be guessing about the coredump case
either: pass the foll_flags down to it, instead of just the write bit.
Remove that obscure huge_zeropage_ok() test. The decision is easy,
though unlike the non-huge case - here vm_ops->fault is always set.
But we know that a fault would serve up zeroes, unless there's
already a hugetlbfs pagecache page to back the range.
(Alternatively, since hugetlb pages aren't swapped out under pressure,
you could save more dump space by arguing that a page not yet faulted
into this process cannot be relevant to the dump; but that would be
more surprising.)
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/hugetlb.h | 4 | ||||
-rw-r--r-- | mm/hugetlb.c | 62 | ||||
-rw-r--r-- | mm/memory.c | 14 |
3 files changed, 48 insertions, 32 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 16cdb75a543a..e7f0fabfa1c2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user * | |||
24 | int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); | 24 | int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); |
25 | int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); | 25 | int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); |
26 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); | 26 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); |
27 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); | 27 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, |
28 | struct page **, struct vm_area_struct **, | ||
29 | unsigned long *, int *, int, unsigned int flags); | ||
28 | void unmap_hugepage_range(struct vm_area_struct *, | 30 | void unmap_hugepage_range(struct vm_area_struct *, |
29 | unsigned long, unsigned long, struct page *); | 31 | unsigned long, unsigned long, struct page *); |
30 | void __unmap_hugepage_range(struct vm_area_struct *, | 32 | void __unmap_hugepage_range(struct vm_area_struct *, |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c001f846f17d..6b41f70bbc7f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h, | |||
2016 | return find_lock_page(mapping, idx); | 2016 | return find_lock_page(mapping, idx); |
2017 | } | 2017 | } |
2018 | 2018 | ||
2019 | /* Return whether there is a pagecache page to back given address within VMA */ | ||
2020 | static bool hugetlbfs_backed(struct hstate *h, | ||
2021 | struct vm_area_struct *vma, unsigned long address) | ||
2022 | { | ||
2023 | struct address_space *mapping; | ||
2024 | pgoff_t idx; | ||
2025 | struct page *page; | ||
2026 | |||
2027 | mapping = vma->vm_file->f_mapping; | ||
2028 | idx = vma_hugecache_offset(h, vma, address); | ||
2029 | |||
2030 | page = find_get_page(mapping, idx); | ||
2031 | if (page) | ||
2032 | put_page(page); | ||
2033 | return page != NULL; | ||
2034 | } | ||
2035 | |||
2019 | static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2036 | static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, |
2020 | unsigned long address, pte_t *ptep, unsigned int flags) | 2037 | unsigned long address, pte_t *ptep, unsigned int flags) |
2021 | { | 2038 | { |
@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, | |||
2211 | return NULL; | 2228 | return NULL; |
2212 | } | 2229 | } |
2213 | 2230 | ||
2214 | static int huge_zeropage_ok(pte_t *ptep, int write, int shared) | ||
2215 | { | ||
2216 | if (!ptep || write || shared) | ||
2217 | return 0; | ||
2218 | else | ||
2219 | return huge_pte_none(huge_ptep_get(ptep)); | ||
2220 | } | ||
2221 | |||
2222 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2231 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, |
2223 | struct page **pages, struct vm_area_struct **vmas, | 2232 | struct page **pages, struct vm_area_struct **vmas, |
2224 | unsigned long *position, int *length, int i, | 2233 | unsigned long *position, int *length, int i, |
2225 | int write) | 2234 | unsigned int flags) |
2226 | { | 2235 | { |
2227 | unsigned long pfn_offset; | 2236 | unsigned long pfn_offset; |
2228 | unsigned long vaddr = *position; | 2237 | unsigned long vaddr = *position; |
2229 | int remainder = *length; | 2238 | int remainder = *length; |
2230 | struct hstate *h = hstate_vma(vma); | 2239 | struct hstate *h = hstate_vma(vma); |
2231 | int zeropage_ok = 0; | ||
2232 | int shared = vma->vm_flags & VM_SHARED; | ||
2233 | 2240 | ||
2234 | spin_lock(&mm->page_table_lock); | 2241 | spin_lock(&mm->page_table_lock); |
2235 | while (vaddr < vma->vm_end && remainder) { | 2242 | while (vaddr < vma->vm_end && remainder) { |
2236 | pte_t *pte; | 2243 | pte_t *pte; |
2244 | int absent; | ||
2237 | struct page *page; | 2245 | struct page *page; |
2238 | 2246 | ||
2239 | /* | 2247 | /* |
2240 | * Some archs (sparc64, sh*) have multiple pte_ts to | 2248 | * Some archs (sparc64, sh*) have multiple pte_ts to |
2241 | * each hugepage. We have to make * sure we get the | 2249 | * each hugepage. We have to make sure we get the |
2242 | * first, for the page indexing below to work. | 2250 | * first, for the page indexing below to work. |
2243 | */ | 2251 | */ |
2244 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); | 2252 | pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); |
2245 | if (huge_zeropage_ok(pte, write, shared)) | 2253 | absent = !pte || huge_pte_none(huge_ptep_get(pte)); |
2246 | zeropage_ok = 1; | 2254 | |
2255 | /* | ||
2256 | * When coredumping, it suits get_dump_page if we just return | ||
2257 | * an error if there's a hole and no huge pagecache to back it. | ||
2258 | */ | ||
2259 | if (absent && | ||
2260 | ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) { | ||
2261 | remainder = 0; | ||
2262 | break; | ||
2263 | } | ||
2247 | 2264 | ||
2248 | if (!pte || | 2265 | if (absent || |
2249 | (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) || | 2266 | ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) { |
2250 | (write && !pte_write(huge_ptep_get(pte)))) { | ||
2251 | int ret; | 2267 | int ret; |
2252 | 2268 | ||
2253 | spin_unlock(&mm->page_table_lock); | 2269 | spin_unlock(&mm->page_table_lock); |
2254 | ret = hugetlb_fault(mm, vma, vaddr, write); | 2270 | ret = hugetlb_fault(mm, vma, vaddr, |
2271 | (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); | ||
2255 | spin_lock(&mm->page_table_lock); | 2272 | spin_lock(&mm->page_table_lock); |
2256 | if (!(ret & VM_FAULT_ERROR)) | 2273 | if (!(ret & VM_FAULT_ERROR)) |
2257 | continue; | 2274 | continue; |
2258 | 2275 | ||
2259 | remainder = 0; | 2276 | remainder = 0; |
2260 | if (!i) | ||
2261 | i = -EFAULT; | ||
2262 | break; | 2277 | break; |
2263 | } | 2278 | } |
2264 | 2279 | ||
@@ -2266,10 +2281,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2266 | page = pte_page(huge_ptep_get(pte)); | 2281 | page = pte_page(huge_ptep_get(pte)); |
2267 | same_page: | 2282 | same_page: |
2268 | if (pages) { | 2283 | if (pages) { |
2269 | if (zeropage_ok) | 2284 | pages[i] = mem_map_offset(page, pfn_offset); |
2270 | pages[i] = ZERO_PAGE(0); | ||
2271 | else | ||
2272 | pages[i] = mem_map_offset(page, pfn_offset); | ||
2273 | get_page(pages[i]); | 2285 | get_page(pages[i]); |
2274 | } | 2286 | } |
2275 | 2287 | ||
@@ -2293,7 +2305,7 @@ same_page: | |||
2293 | *length = remainder; | 2305 | *length = remainder; |
2294 | *position = vaddr; | 2306 | *position = vaddr; |
2295 | 2307 | ||
2296 | return i; | 2308 | return i ? i : -EFAULT; |
2297 | } | 2309 | } |
2298 | 2310 | ||
2299 | void hugetlb_change_protection(struct vm_area_struct *vma, | 2311 | void hugetlb_change_protection(struct vm_area_struct *vma, |
diff --git a/mm/memory.c b/mm/memory.c index 532a55bce6a4..6359a4f80c4a 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1260 | !(vm_flags & vma->vm_flags)) | 1260 | !(vm_flags & vma->vm_flags)) |
1261 | return i ? : -EFAULT; | 1261 | return i ? : -EFAULT; |
1262 | 1262 | ||
1263 | if (is_vm_hugetlb_page(vma)) { | ||
1264 | i = follow_hugetlb_page(mm, vma, pages, vmas, | ||
1265 | &start, &nr_pages, i, write); | ||
1266 | continue; | ||
1267 | } | ||
1268 | |||
1269 | foll_flags = FOLL_TOUCH; | 1263 | foll_flags = FOLL_TOUCH; |
1270 | if (pages) | 1264 | if (pages) |
1271 | foll_flags |= FOLL_GET; | 1265 | foll_flags |= FOLL_GET; |
1272 | if (flags & GUP_FLAGS_DUMP) | 1266 | if (flags & GUP_FLAGS_DUMP) |
1273 | foll_flags |= FOLL_DUMP; | 1267 | foll_flags |= FOLL_DUMP; |
1268 | if (write) | ||
1269 | foll_flags |= FOLL_WRITE; | ||
1270 | |||
1271 | if (is_vm_hugetlb_page(vma)) { | ||
1272 | i = follow_hugetlb_page(mm, vma, pages, vmas, | ||
1273 | &start, &nr_pages, i, foll_flags); | ||
1274 | continue; | ||
1275 | } | ||
1274 | 1276 | ||
1275 | do { | 1277 | do { |
1276 | struct page *page; | 1278 | struct page *page; |