aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJes Sorensen <jes@sgi.com>2006-09-27 04:50:10 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-27 11:26:13 -0400
commitf4b81804a2d1ab341a4613089dc31ecce0800ed8 (patch)
treefdad77a7c40790908546a74a9b5918400d01333d
parent5d2923436217ba8bd05c5ee157712a391891c382 (diff)
[PATCH] do_no_pfn()
Implement do_no_pfn() for handling mapping of memory without a struct page backing it. This avoids creating fake page table entries for regions which are not backed by real memory. This feature is used by the MSPEC driver and other users, where it is highly undesirable to have a struct page sitting behind the page (for instance if the page is accessed in cached mode via the struct page in parallel to the the driver accessing it uncached, which can result in data corruption on some architectures, such as ia64). This version uses specific NOPFN_{SIGBUS,OOM} return values, rather than expect all negative pfn values would be an error. It also bugs on cow mappings as this would not work with the VM. [akpm@osdl.org: micro-optimise] Signed-off-by: Jes Sorensen <jes@sgi.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mm.h7
-rw-r--r--mm/memory.c64
2 files changed, 66 insertions, 5 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e433bbc6e7e..22165cb18906 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -199,6 +199,7 @@ struct vm_operations_struct {
199 void (*open)(struct vm_area_struct * area); 199 void (*open)(struct vm_area_struct * area);
200 void (*close)(struct vm_area_struct * area); 200 void (*close)(struct vm_area_struct * area);
201 struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); 201 struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
202 unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address);
202 int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); 203 int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
203 204
204 /* notification that a previously read-only page is about to become 205 /* notification that a previously read-only page is about to become
@@ -594,6 +595,12 @@ static inline int page_mapped(struct page *page)
594#define NOPAGE_OOM ((struct page *) (-1)) 595#define NOPAGE_OOM ((struct page *) (-1))
595 596
596/* 597/*
598 * Error return values for the *_nopfn functions
599 */
600#define NOPFN_SIGBUS ((unsigned long) -1)
601#define NOPFN_OOM ((unsigned long) -2)
602
603/*
597 * Different kinds of faults, as returned by handle_mm_fault(). 604 * Different kinds of faults, as returned by handle_mm_fault().
598 * Used to decide whether a process gets delivered SIGBUS or 605 * Used to decide whether a process gets delivered SIGBUS or
599 * just gets major/minor fault counters bumped up. 606 * just gets major/minor fault counters bumped up.
diff --git a/mm/memory.c b/mm/memory.c
index 92a3ebd8d795..f2ef1dcfff77 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2256,6 +2256,54 @@ oom:
2256} 2256}
2257 2257
2258/* 2258/*
2259 * do_no_pfn() tries to create a new page mapping for a page without
2260 * a struct_page backing it
2261 *
2262 * As this is called only for pages that do not currently exist, we
2263 * do not need to flush old virtual caches or the TLB.
2264 *
2265 * We enter with non-exclusive mmap_sem (to exclude vma changes,
2266 * but allow concurrent faults), and pte mapped but not yet locked.
2267 * We return with mmap_sem still held, but pte unmapped and unlocked.
2268 *
2269 * It is expected that the ->nopfn handler always returns the same pfn
2270 * for a given virtual mapping.
2271 *
2272 * Mark this `noinline' to prevent it from bloating the main pagefault code.
2273 */
2274static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2275 unsigned long address, pte_t *page_table, pmd_t *pmd,
2276 int write_access)
2277{
2278 spinlock_t *ptl;
2279 pte_t entry;
2280 unsigned long pfn;
2281 int ret = VM_FAULT_MINOR;
2282
2283 pte_unmap(page_table);
2284 BUG_ON(!(vma->vm_flags & VM_PFNMAP));
2285 BUG_ON(is_cow_mapping(vma->vm_flags));
2286
2287 pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
2288 if (pfn == NOPFN_OOM)
2289 return VM_FAULT_OOM;
2290 if (pfn == NOPFN_SIGBUS)
2291 return VM_FAULT_SIGBUS;
2292
2293 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2294
2295 /* Only go through if we didn't race with anybody else... */
2296 if (pte_none(*page_table)) {
2297 entry = pfn_pte(pfn, vma->vm_page_prot);
2298 if (write_access)
2299 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2300 set_pte_at(mm, address, page_table, entry);
2301 }
2302 pte_unmap_unlock(page_table, ptl);
2303 return ret;
2304}
2305
2306/*
2259 * Fault of a previously existing named mapping. Repopulate the pte 2307 * Fault of a previously existing named mapping. Repopulate the pte
2260 * from the encoded file_pte if possible. This enables swappable 2308 * from the encoded file_pte if possible. This enables swappable
2261 * nonlinear vmas. 2309 * nonlinear vmas.
@@ -2317,11 +2365,17 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2317 old_entry = entry = *pte; 2365 old_entry = entry = *pte;
2318 if (!pte_present(entry)) { 2366 if (!pte_present(entry)) {
2319 if (pte_none(entry)) { 2367 if (pte_none(entry)) {
2320 if (!vma->vm_ops || !vma->vm_ops->nopage) 2368 if (vma->vm_ops) {
2321 return do_anonymous_page(mm, vma, address, 2369 if (vma->vm_ops->nopage)
2322 pte, pmd, write_access); 2370 return do_no_page(mm, vma, address,
2323 return do_no_page(mm, vma, address, 2371 pte, pmd,
2324 pte, pmd, write_access); 2372 write_access);
2373 if (unlikely(vma->vm_ops->nopfn))
2374 return do_no_pfn(mm, vma, address, pte,
2375 pmd, write_access);
2376 }
2377 return do_anonymous_page(mm, vma, address,
2378 pte, pmd, write_access);
2325 } 2379 }
2326 if (pte_file(entry)) 2380 if (pte_file(entry))
2327 return do_file_page(mm, vma, address, 2381 return do_file_page(mm, vma, address,