aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-09-21 20:03:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:40 -0400
commit2a15efc953b26ad57d7d38b9e6782d57e53b4ab2 (patch)
treef4d04903b3303e80460d2fa3f38da2b7eea82d22
parent8e4b9a60718970bbc02dfd3abd0b956ab65af231 (diff)
mm: follow_hugetlb_page flags
follow_hugetlb_page() shouldn't be guessing about the coredump case either: pass the foll_flags down to it, instead of just the write bit. Remove that obscure huge_zeropage_ok() test. The decision is easy, though unlike the non-huge case - here vm_ops->fault is always set. But we know that a fault would serve up zeroes, unless there's already a hugetlbfs pagecache page to back the range. (Alternatively, since hugetlb pages aren't swapped out under pressure, you could save more dump space by arguing that a page not yet faulted into this process cannot be relevant to the dump; but that would be more surprising.) Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Acked-by: Rik van Riel <riel@redhat.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Minchan Kim <minchan.kim@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/hugetlb.h4
-rw-r--r--mm/hugetlb.c62
-rw-r--r--mm/memory.c14
3 files changed, 48 insertions, 32 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 16cdb75a543a..e7f0fabfa1c2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
24int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 24int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
25int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 25int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); 27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
28 struct page **, struct vm_area_struct **,
29 unsigned long *, int *, int, unsigned int flags);
28void unmap_hugepage_range(struct vm_area_struct *, 30void unmap_hugepage_range(struct vm_area_struct *,
29 unsigned long, unsigned long, struct page *); 31 unsigned long, unsigned long, struct page *);
30void __unmap_hugepage_range(struct vm_area_struct *, 32void __unmap_hugepage_range(struct vm_area_struct *,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c001f846f17d..6b41f70bbc7f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
2016 return find_lock_page(mapping, idx); 2016 return find_lock_page(mapping, idx);
2017} 2017}
2018 2018
2019/* Return whether there is a pagecache page to back given address within VMA */
2020static bool hugetlbfs_backed(struct hstate *h,
2021 struct vm_area_struct *vma, unsigned long address)
2022{
2023 struct address_space *mapping;
2024 pgoff_t idx;
2025 struct page *page;
2026
2027 mapping = vma->vm_file->f_mapping;
2028 idx = vma_hugecache_offset(h, vma, address);
2029
2030 page = find_get_page(mapping, idx);
2031 if (page)
2032 put_page(page);
2033 return page != NULL;
2034}
2035
2019static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 2036static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2020 unsigned long address, pte_t *ptep, unsigned int flags) 2037 unsigned long address, pte_t *ptep, unsigned int flags)
2021{ 2038{
@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
2211 return NULL; 2228 return NULL;
2212} 2229}
2213 2230
2214static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
2215{
2216 if (!ptep || write || shared)
2217 return 0;
2218 else
2219 return huge_pte_none(huge_ptep_get(ptep));
2220}
2221
2222int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 2231int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2223 struct page **pages, struct vm_area_struct **vmas, 2232 struct page **pages, struct vm_area_struct **vmas,
2224 unsigned long *position, int *length, int i, 2233 unsigned long *position, int *length, int i,
2225 int write) 2234 unsigned int flags)
2226{ 2235{
2227 unsigned long pfn_offset; 2236 unsigned long pfn_offset;
2228 unsigned long vaddr = *position; 2237 unsigned long vaddr = *position;
2229 int remainder = *length; 2238 int remainder = *length;
2230 struct hstate *h = hstate_vma(vma); 2239 struct hstate *h = hstate_vma(vma);
2231 int zeropage_ok = 0;
2232 int shared = vma->vm_flags & VM_SHARED;
2233 2240
2234 spin_lock(&mm->page_table_lock); 2241 spin_lock(&mm->page_table_lock);
2235 while (vaddr < vma->vm_end && remainder) { 2242 while (vaddr < vma->vm_end && remainder) {
2236 pte_t *pte; 2243 pte_t *pte;
2244 int absent;
2237 struct page *page; 2245 struct page *page;
2238 2246
2239 /* 2247 /*
2240 * Some archs (sparc64, sh*) have multiple pte_ts to 2248 * Some archs (sparc64, sh*) have multiple pte_ts to
2241 * each hugepage. We have to make * sure we get the 2249 * each hugepage. We have to make sure we get the
2242 * first, for the page indexing below to work. 2250 * first, for the page indexing below to work.
2243 */ 2251 */
2244 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); 2252 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
2245 if (huge_zeropage_ok(pte, write, shared)) 2253 absent = !pte || huge_pte_none(huge_ptep_get(pte));
2246 zeropage_ok = 1; 2254
2255 /*
2256 * When coredumping, it suits get_dump_page if we just return
2257 * an error if there's a hole and no huge pagecache to back it.
2258 */
2259 if (absent &&
2260 ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
2261 remainder = 0;
2262 break;
2263 }
2247 2264
2248 if (!pte || 2265 if (absent ||
2249 (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) || 2266 ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
2250 (write && !pte_write(huge_ptep_get(pte)))) {
2251 int ret; 2267 int ret;
2252 2268
2253 spin_unlock(&mm->page_table_lock); 2269 spin_unlock(&mm->page_table_lock);
2254 ret = hugetlb_fault(mm, vma, vaddr, write); 2270 ret = hugetlb_fault(mm, vma, vaddr,
2271 (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
2255 spin_lock(&mm->page_table_lock); 2272 spin_lock(&mm->page_table_lock);
2256 if (!(ret & VM_FAULT_ERROR)) 2273 if (!(ret & VM_FAULT_ERROR))
2257 continue; 2274 continue;
2258 2275
2259 remainder = 0; 2276 remainder = 0;
2260 if (!i)
2261 i = -EFAULT;
2262 break; 2277 break;
2263 } 2278 }
2264 2279
@@ -2266,10 +2281,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2266 page = pte_page(huge_ptep_get(pte)); 2281 page = pte_page(huge_ptep_get(pte));
2267same_page: 2282same_page:
2268 if (pages) { 2283 if (pages) {
2269 if (zeropage_ok) 2284 pages[i] = mem_map_offset(page, pfn_offset);
2270 pages[i] = ZERO_PAGE(0);
2271 else
2272 pages[i] = mem_map_offset(page, pfn_offset);
2273 get_page(pages[i]); 2285 get_page(pages[i]);
2274 } 2286 }
2275 2287
@@ -2293,7 +2305,7 @@ same_page:
2293 *length = remainder; 2305 *length = remainder;
2294 *position = vaddr; 2306 *position = vaddr;
2295 2307
2296 return i; 2308 return i ? i : -EFAULT;
2297} 2309}
2298 2310
2299void hugetlb_change_protection(struct vm_area_struct *vma, 2311void hugetlb_change_protection(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c
index 532a55bce6a4..6359a4f80c4a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1260 !(vm_flags & vma->vm_flags)) 1260 !(vm_flags & vma->vm_flags))
1261 return i ? : -EFAULT; 1261 return i ? : -EFAULT;
1262 1262
1263 if (is_vm_hugetlb_page(vma)) {
1264 i = follow_hugetlb_page(mm, vma, pages, vmas,
1265 &start, &nr_pages, i, write);
1266 continue;
1267 }
1268
1269 foll_flags = FOLL_TOUCH; 1263 foll_flags = FOLL_TOUCH;
1270 if (pages) 1264 if (pages)
1271 foll_flags |= FOLL_GET; 1265 foll_flags |= FOLL_GET;
1272 if (flags & GUP_FLAGS_DUMP) 1266 if (flags & GUP_FLAGS_DUMP)
1273 foll_flags |= FOLL_DUMP; 1267 foll_flags |= FOLL_DUMP;
1268 if (write)
1269 foll_flags |= FOLL_WRITE;
1270
1271 if (is_vm_hugetlb_page(vma)) {
1272 i = follow_hugetlb_page(mm, vma, pages, vmas,
1273 &start, &nr_pages, i, foll_flags);
1274 continue;
1275 }
1274 1276
1275 do { 1277 do {
1276 struct page *page; 1278 struct page *page;