aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c194
1 files changed, 173 insertions, 21 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 109e9866237e..601159a46ab6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/delayacct.h> 50#include <linux/delayacct.h>
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/writeback.h>
52 53
53#include <asm/pgalloc.h> 54#include <asm/pgalloc.h>
54#include <asm/uaccess.h> 55#include <asm/uaccess.h>
@@ -1226,7 +1227,12 @@ out:
1226 return retval; 1227 return retval;
1227} 1228}
1228 1229
1229/* 1230/**
1231 * vm_insert_page - insert single page into user vma
1232 * @vma: user vma to map to
1233 * @addr: target user address of this page
1234 * @page: source kernel page
1235 *
1230 * This allows drivers to insert individual pages they've allocated 1236 * This allows drivers to insert individual pages they've allocated
1231 * into a user vma. 1237 * into a user vma.
1232 * 1238 *
@@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1318 return 0; 1324 return 0;
1319} 1325}
1320 1326
1321/* Note: this is only safe if the mm semaphore is held when called. */ 1327/**
1328 * remap_pfn_range - remap kernel memory to userspace
1329 * @vma: user vma to map to
1330 * @addr: target user address to start at
1331 * @pfn: physical address of kernel memory
1332 * @size: size of map area
1333 * @prot: page protection flags for this mapping
1334 *
1335 * Note: this is only safe if the mm semaphore is held when called.
1336 */
1322int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 1337int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1323 unsigned long pfn, unsigned long size, pgprot_t prot) 1338 unsigned long pfn, unsigned long size, pgprot_t prot)
1324{ 1339{
@@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1458{ 1473{
1459 struct page *old_page, *new_page; 1474 struct page *old_page, *new_page;
1460 pte_t entry; 1475 pte_t entry;
1461 int reuse, ret = VM_FAULT_MINOR; 1476 int reuse = 0, ret = VM_FAULT_MINOR;
1477 struct page *dirty_page = NULL;
1462 1478
1463 old_page = vm_normal_page(vma, address, orig_pte); 1479 old_page = vm_normal_page(vma, address, orig_pte);
1464 if (!old_page) 1480 if (!old_page)
1465 goto gotten; 1481 goto gotten;
1466 1482
1467 if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == 1483 /*
1468 (VM_SHARED|VM_WRITE))) { 1484 * Take out anonymous pages first, anonymous shared vmas are
1485 * not dirty accountable.
1486 */
1487 if (PageAnon(old_page)) {
1488 if (!TestSetPageLocked(old_page)) {
1489 reuse = can_share_swap_page(old_page);
1490 unlock_page(old_page);
1491 }
1492 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1493 (VM_WRITE|VM_SHARED))) {
1494 /*
1495 * Only catch write-faults on shared writable pages,
1496 * read-only shared pages can get COWed by
1497 * get_user_pages(.write=1, .force=1).
1498 */
1469 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { 1499 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1470 /* 1500 /*
1471 * Notify the address space that the page is about to 1501 * Notify the address space that the page is about to
@@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1494 if (!pte_same(*page_table, orig_pte)) 1524 if (!pte_same(*page_table, orig_pte))
1495 goto unlock; 1525 goto unlock;
1496 } 1526 }
1497 1527 dirty_page = old_page;
1528 get_page(dirty_page);
1498 reuse = 1; 1529 reuse = 1;
1499 } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
1500 reuse = can_share_swap_page(old_page);
1501 unlock_page(old_page);
1502 } else {
1503 reuse = 0;
1504 } 1530 }
1505 1531
1506 if (reuse) { 1532 if (reuse) {
@@ -1566,6 +1592,10 @@ gotten:
1566 page_cache_release(old_page); 1592 page_cache_release(old_page);
1567unlock: 1593unlock:
1568 pte_unmap_unlock(page_table, ptl); 1594 pte_unmap_unlock(page_table, ptl);
1595 if (dirty_page) {
1596 set_page_dirty_balance(dirty_page);
1597 put_page(dirty_page);
1598 }
1569 return ret; 1599 return ret;
1570oom: 1600oom:
1571 if (old_page) 1601 if (old_page)
@@ -1785,9 +1815,10 @@ void unmap_mapping_range(struct address_space *mapping,
1785} 1815}
1786EXPORT_SYMBOL(unmap_mapping_range); 1816EXPORT_SYMBOL(unmap_mapping_range);
1787 1817
1788/* 1818/**
1789 * Handle all mappings that got truncated by a "truncate()" 1819 * vmtruncate - unmap mappings "freed" by truncate() syscall
1790 * system call. 1820 * @inode: inode of the file used
1821 * @offset: file offset to start truncating
1791 * 1822 *
1792 * NOTE! We have to be ready to update the memory sharing 1823 * NOTE! We have to be ready to update the memory sharing
1793 * between the file and the memory map for a potential last 1824 * between the file and the memory map for a potential last
@@ -1856,11 +1887,16 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
1856} 1887}
1857EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ 1888EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */
1858 1889
1859/* 1890/**
1891 * swapin_readahead - swap in pages in hope we need them soon
1892 * @entry: swap entry of this memory
1893 * @addr: address to start
1894 * @vma: user vma this addresses belong to
1895 *
1860 * Primitive swap readahead code. We simply read an aligned block of 1896 * Primitive swap readahead code. We simply read an aligned block of
1861 * (1 << page_cluster) entries in the swap area. This method is chosen 1897 * (1 << page_cluster) entries in the swap area. This method is chosen
1862 * because it doesn't cost us any seek time. We also make sure to queue 1898 * because it doesn't cost us any seek time. We also make sure to queue
1863 * the 'original' request together with the readahead ones... 1899 * the 'original' request together with the readahead ones...
1864 * 1900 *
1865 * This has been extended to use the NUMA policies from the mm triggering 1901 * This has been extended to use the NUMA policies from the mm triggering
1866 * the readahead. 1902 * the readahead.
@@ -2098,6 +2134,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2098 unsigned int sequence = 0; 2134 unsigned int sequence = 0;
2099 int ret = VM_FAULT_MINOR; 2135 int ret = VM_FAULT_MINOR;
2100 int anon = 0; 2136 int anon = 0;
2137 struct page *dirty_page = NULL;
2101 2138
2102 pte_unmap(page_table); 2139 pte_unmap(page_table);
2103 BUG_ON(vma->vm_flags & VM_PFNMAP); 2140 BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2192,6 +2229,10 @@ retry:
2192 } else { 2229 } else {
2193 inc_mm_counter(mm, file_rss); 2230 inc_mm_counter(mm, file_rss);
2194 page_add_file_rmap(new_page); 2231 page_add_file_rmap(new_page);
2232 if (write_access) {
2233 dirty_page = new_page;
2234 get_page(dirty_page);
2235 }
2195 } 2236 }
2196 } else { 2237 } else {
2197 /* One of our sibling threads was faster, back out. */ 2238 /* One of our sibling threads was faster, back out. */
@@ -2204,6 +2245,10 @@ retry:
2204 lazy_mmu_prot_update(entry); 2245 lazy_mmu_prot_update(entry);
2205unlock: 2246unlock:
2206 pte_unmap_unlock(page_table, ptl); 2247 pte_unmap_unlock(page_table, ptl);
2248 if (dirty_page) {
2249 set_page_dirty_balance(dirty_page);
2250 put_page(dirty_page);
2251 }
2207 return ret; 2252 return ret;
2208oom: 2253oom:
2209 page_cache_release(new_page); 2254 page_cache_release(new_page);
@@ -2211,6 +2256,54 @@ oom:
2211} 2256}
2212 2257
2213/* 2258/*
2259 * do_no_pfn() tries to create a new page mapping for a page without
2260 * a struct_page backing it
2261 *
2262 * As this is called only for pages that do not currently exist, we
2263 * do not need to flush old virtual caches or the TLB.
2264 *
2265 * We enter with non-exclusive mmap_sem (to exclude vma changes,
2266 * but allow concurrent faults), and pte mapped but not yet locked.
2267 * We return with mmap_sem still held, but pte unmapped and unlocked.
2268 *
2269 * It is expected that the ->nopfn handler always returns the same pfn
2270 * for a given virtual mapping.
2271 *
2272 * Mark this `noinline' to prevent it from bloating the main pagefault code.
2273 */
2274static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2275 unsigned long address, pte_t *page_table, pmd_t *pmd,
2276 int write_access)
2277{
2278 spinlock_t *ptl;
2279 pte_t entry;
2280 unsigned long pfn;
2281 int ret = VM_FAULT_MINOR;
2282
2283 pte_unmap(page_table);
2284 BUG_ON(!(vma->vm_flags & VM_PFNMAP));
2285 BUG_ON(is_cow_mapping(vma->vm_flags));
2286
2287 pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
2288 if (pfn == NOPFN_OOM)
2289 return VM_FAULT_OOM;
2290 if (pfn == NOPFN_SIGBUS)
2291 return VM_FAULT_SIGBUS;
2292
2293 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2294
2295 /* Only go through if we didn't race with anybody else... */
2296 if (pte_none(*page_table)) {
2297 entry = pfn_pte(pfn, vma->vm_page_prot);
2298 if (write_access)
2299 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2300 set_pte_at(mm, address, page_table, entry);
2301 }
2302 pte_unmap_unlock(page_table, ptl);
2303 return ret;
2304}
2305
2306/*
2214 * Fault of a previously existing named mapping. Repopulate the pte 2307 * Fault of a previously existing named mapping. Repopulate the pte
2215 * from the encoded file_pte if possible. This enables swappable 2308 * from the encoded file_pte if possible. This enables swappable
2216 * nonlinear vmas. 2309 * nonlinear vmas.
@@ -2272,11 +2365,17 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2272 old_entry = entry = *pte; 2365 old_entry = entry = *pte;
2273 if (!pte_present(entry)) { 2366 if (!pte_present(entry)) {
2274 if (pte_none(entry)) { 2367 if (pte_none(entry)) {
2275 if (!vma->vm_ops || !vma->vm_ops->nopage) 2368 if (vma->vm_ops) {
2276 return do_anonymous_page(mm, vma, address, 2369 if (vma->vm_ops->nopage)
2277 pte, pmd, write_access); 2370 return do_no_page(mm, vma, address,
2278 return do_no_page(mm, vma, address, 2371 pte, pmd,
2279 pte, pmd, write_access); 2372 write_access);
2373 if (unlikely(vma->vm_ops->nopfn))
2374 return do_no_pfn(mm, vma, address, pte,
2375 pmd, write_access);
2376 }
2377 return do_anonymous_page(mm, vma, address,
2378 pte, pmd, write_access);
2280 } 2379 }
2281 if (pte_file(entry)) 2380 if (pte_file(entry))
2282 return do_file_page(mm, vma, address, 2381 return do_file_page(mm, vma, address,
@@ -2505,3 +2604,56 @@ int in_gate_area_no_task(unsigned long addr)
2505} 2604}
2506 2605
2507#endif /* __HAVE_ARCH_GATE_AREA */ 2606#endif /* __HAVE_ARCH_GATE_AREA */
2607
2608/*
2609 * Access another process' address space.
2610 * Source/target buffer must be kernel space,
2611 * Do not walk the page table directly, use get_user_pages
2612 */
2613int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
2614{
2615 struct mm_struct *mm;
2616 struct vm_area_struct *vma;
2617 struct page *page;
2618 void *old_buf = buf;
2619
2620 mm = get_task_mm(tsk);
2621 if (!mm)
2622 return 0;
2623
2624 down_read(&mm->mmap_sem);
2625 /* ignore errors, just check how much was sucessfully transfered */
2626 while (len) {
2627 int bytes, ret, offset;
2628 void *maddr;
2629
2630 ret = get_user_pages(tsk, mm, addr, 1,
2631 write, 1, &page, &vma);
2632 if (ret <= 0)
2633 break;
2634
2635 bytes = len;
2636 offset = addr & (PAGE_SIZE-1);
2637 if (bytes > PAGE_SIZE-offset)
2638 bytes = PAGE_SIZE-offset;
2639
2640 maddr = kmap(page);
2641 if (write) {
2642 copy_to_user_page(vma, page, addr,
2643 maddr + offset, buf, bytes);
2644 set_page_dirty_lock(page);
2645 } else {
2646 copy_from_user_page(vma, page, addr,
2647 buf, maddr + offset, bytes);
2648 }
2649 kunmap(page);
2650 page_cache_release(page);
2651 len -= bytes;
2652 buf += bytes;
2653 addr += bytes;
2654 }
2655 up_read(&mm->mmap_sem);
2656 mmput(mm);
2657
2658 return buf - old_buf;
2659}