diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 194 |
1 files changed, 173 insertions, 21 deletions
diff --git a/mm/memory.c b/mm/memory.c index 109e9866237e..601159a46ab6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/delayacct.h> | 50 | #include <linux/delayacct.h> |
51 | #include <linux/init.h> | 51 | #include <linux/init.h> |
52 | #include <linux/writeback.h> | ||
52 | 53 | ||
53 | #include <asm/pgalloc.h> | 54 | #include <asm/pgalloc.h> |
54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
@@ -1226,7 +1227,12 @@ out: | |||
1226 | return retval; | 1227 | return retval; |
1227 | } | 1228 | } |
1228 | 1229 | ||
1229 | /* | 1230 | /** |
1231 | * vm_insert_page - insert single page into user vma | ||
1232 | * @vma: user vma to map to | ||
1233 | * @addr: target user address of this page | ||
1234 | * @page: source kernel page | ||
1235 | * | ||
1230 | * This allows drivers to insert individual pages they've allocated | 1236 | * This allows drivers to insert individual pages they've allocated |
1231 | * into a user vma. | 1237 | * into a user vma. |
1232 | * | 1238 | * |
@@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, | |||
1318 | return 0; | 1324 | return 0; |
1319 | } | 1325 | } |
1320 | 1326 | ||
1321 | /* Note: this is only safe if the mm semaphore is held when called. */ | 1327 | /** |
1328 | * remap_pfn_range - remap kernel memory to userspace | ||
1329 | * @vma: user vma to map to | ||
1330 | * @addr: target user address to start at | ||
1331 | * @pfn: physical address of kernel memory | ||
1332 | * @size: size of map area | ||
1333 | * @prot: page protection flags for this mapping | ||
1334 | * | ||
1335 | * Note: this is only safe if the mm semaphore is held when called. | ||
1336 | */ | ||
1322 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | 1337 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, |
1323 | unsigned long pfn, unsigned long size, pgprot_t prot) | 1338 | unsigned long pfn, unsigned long size, pgprot_t prot) |
1324 | { | 1339 | { |
@@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1458 | { | 1473 | { |
1459 | struct page *old_page, *new_page; | 1474 | struct page *old_page, *new_page; |
1460 | pte_t entry; | 1475 | pte_t entry; |
1461 | int reuse, ret = VM_FAULT_MINOR; | 1476 | int reuse = 0, ret = VM_FAULT_MINOR; |
1477 | struct page *dirty_page = NULL; | ||
1462 | 1478 | ||
1463 | old_page = vm_normal_page(vma, address, orig_pte); | 1479 | old_page = vm_normal_page(vma, address, orig_pte); |
1464 | if (!old_page) | 1480 | if (!old_page) |
1465 | goto gotten; | 1481 | goto gotten; |
1466 | 1482 | ||
1467 | if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == | 1483 | /* |
1468 | (VM_SHARED|VM_WRITE))) { | 1484 | * Take out anonymous pages first, anonymous shared vmas are |
1485 | * not dirty accountable. | ||
1486 | */ | ||
1487 | if (PageAnon(old_page)) { | ||
1488 | if (!TestSetPageLocked(old_page)) { | ||
1489 | reuse = can_share_swap_page(old_page); | ||
1490 | unlock_page(old_page); | ||
1491 | } | ||
1492 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
1493 | (VM_WRITE|VM_SHARED))) { | ||
1494 | /* | ||
1495 | * Only catch write-faults on shared writable pages, | ||
1496 | * read-only shared pages can get COWed by | ||
1497 | * get_user_pages(.write=1, .force=1). | ||
1498 | */ | ||
1469 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | 1499 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { |
1470 | /* | 1500 | /* |
1471 | * Notify the address space that the page is about to | 1501 | * Notify the address space that the page is about to |
@@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1494 | if (!pte_same(*page_table, orig_pte)) | 1524 | if (!pte_same(*page_table, orig_pte)) |
1495 | goto unlock; | 1525 | goto unlock; |
1496 | } | 1526 | } |
1497 | 1527 | dirty_page = old_page; | |
1528 | get_page(dirty_page); | ||
1498 | reuse = 1; | 1529 | reuse = 1; |
1499 | } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | ||
1500 | reuse = can_share_swap_page(old_page); | ||
1501 | unlock_page(old_page); | ||
1502 | } else { | ||
1503 | reuse = 0; | ||
1504 | } | 1530 | } |
1505 | 1531 | ||
1506 | if (reuse) { | 1532 | if (reuse) { |
@@ -1566,6 +1592,10 @@ gotten: | |||
1566 | page_cache_release(old_page); | 1592 | page_cache_release(old_page); |
1567 | unlock: | 1593 | unlock: |
1568 | pte_unmap_unlock(page_table, ptl); | 1594 | pte_unmap_unlock(page_table, ptl); |
1595 | if (dirty_page) { | ||
1596 | set_page_dirty_balance(dirty_page); | ||
1597 | put_page(dirty_page); | ||
1598 | } | ||
1569 | return ret; | 1599 | return ret; |
1570 | oom: | 1600 | oom: |
1571 | if (old_page) | 1601 | if (old_page) |
@@ -1785,9 +1815,10 @@ void unmap_mapping_range(struct address_space *mapping, | |||
1785 | } | 1815 | } |
1786 | EXPORT_SYMBOL(unmap_mapping_range); | 1816 | EXPORT_SYMBOL(unmap_mapping_range); |
1787 | 1817 | ||
1788 | /* | 1818 | /** |
1789 | * Handle all mappings that got truncated by a "truncate()" | 1819 | * vmtruncate - unmap mappings "freed" by truncate() syscall |
1790 | * system call. | 1820 | * @inode: inode of the file used |
1821 | * @offset: file offset to start truncating | ||
1791 | * | 1822 | * |
1792 | * NOTE! We have to be ready to update the memory sharing | 1823 | * NOTE! We have to be ready to update the memory sharing |
1793 | * between the file and the memory map for a potential last | 1824 | * between the file and the memory map for a potential last |
@@ -1856,11 +1887,16 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) | |||
1856 | } | 1887 | } |
1857 | EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ | 1888 | EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ |
1858 | 1889 | ||
1859 | /* | 1890 | /** |
1891 | * swapin_readahead - swap in pages in hope we need them soon | ||
1892 | * @entry: swap entry of this memory | ||
1893 | * @addr: address to start | ||
1894 | * @vma: user vma this addresses belong to | ||
1895 | * | ||
1860 | * Primitive swap readahead code. We simply read an aligned block of | 1896 | * Primitive swap readahead code. We simply read an aligned block of |
1861 | * (1 << page_cluster) entries in the swap area. This method is chosen | 1897 | * (1 << page_cluster) entries in the swap area. This method is chosen |
1862 | * because it doesn't cost us any seek time. We also make sure to queue | 1898 | * because it doesn't cost us any seek time. We also make sure to queue |
1863 | * the 'original' request together with the readahead ones... | 1899 | * the 'original' request together with the readahead ones... |
1864 | * | 1900 | * |
1865 | * This has been extended to use the NUMA policies from the mm triggering | 1901 | * This has been extended to use the NUMA policies from the mm triggering |
1866 | * the readahead. | 1902 | * the readahead. |
@@ -2098,6 +2134,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2098 | unsigned int sequence = 0; | 2134 | unsigned int sequence = 0; |
2099 | int ret = VM_FAULT_MINOR; | 2135 | int ret = VM_FAULT_MINOR; |
2100 | int anon = 0; | 2136 | int anon = 0; |
2137 | struct page *dirty_page = NULL; | ||
2101 | 2138 | ||
2102 | pte_unmap(page_table); | 2139 | pte_unmap(page_table); |
2103 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2140 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
@@ -2192,6 +2229,10 @@ retry: | |||
2192 | } else { | 2229 | } else { |
2193 | inc_mm_counter(mm, file_rss); | 2230 | inc_mm_counter(mm, file_rss); |
2194 | page_add_file_rmap(new_page); | 2231 | page_add_file_rmap(new_page); |
2232 | if (write_access) { | ||
2233 | dirty_page = new_page; | ||
2234 | get_page(dirty_page); | ||
2235 | } | ||
2195 | } | 2236 | } |
2196 | } else { | 2237 | } else { |
2197 | /* One of our sibling threads was faster, back out. */ | 2238 | /* One of our sibling threads was faster, back out. */ |
@@ -2204,6 +2245,10 @@ retry: | |||
2204 | lazy_mmu_prot_update(entry); | 2245 | lazy_mmu_prot_update(entry); |
2205 | unlock: | 2246 | unlock: |
2206 | pte_unmap_unlock(page_table, ptl); | 2247 | pte_unmap_unlock(page_table, ptl); |
2248 | if (dirty_page) { | ||
2249 | set_page_dirty_balance(dirty_page); | ||
2250 | put_page(dirty_page); | ||
2251 | } | ||
2207 | return ret; | 2252 | return ret; |
2208 | oom: | 2253 | oom: |
2209 | page_cache_release(new_page); | 2254 | page_cache_release(new_page); |
@@ -2211,6 +2256,54 @@ oom: | |||
2211 | } | 2256 | } |
2212 | 2257 | ||
2213 | /* | 2258 | /* |
2259 | * do_no_pfn() tries to create a new page mapping for a page without | ||
2260 | * a struct_page backing it | ||
2261 | * | ||
2262 | * As this is called only for pages that do not currently exist, we | ||
2263 | * do not need to flush old virtual caches or the TLB. | ||
2264 | * | ||
2265 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
2266 | * but allow concurrent faults), and pte mapped but not yet locked. | ||
2267 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
2268 | * | ||
2269 | * It is expected that the ->nopfn handler always returns the same pfn | ||
2270 | * for a given virtual mapping. | ||
2271 | * | ||
2272 | * Mark this `noinline' to prevent it from bloating the main pagefault code. | ||
2273 | */ | ||
2274 | static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2275 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
2276 | int write_access) | ||
2277 | { | ||
2278 | spinlock_t *ptl; | ||
2279 | pte_t entry; | ||
2280 | unsigned long pfn; | ||
2281 | int ret = VM_FAULT_MINOR; | ||
2282 | |||
2283 | pte_unmap(page_table); | ||
2284 | BUG_ON(!(vma->vm_flags & VM_PFNMAP)); | ||
2285 | BUG_ON(is_cow_mapping(vma->vm_flags)); | ||
2286 | |||
2287 | pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); | ||
2288 | if (pfn == NOPFN_OOM) | ||
2289 | return VM_FAULT_OOM; | ||
2290 | if (pfn == NOPFN_SIGBUS) | ||
2291 | return VM_FAULT_SIGBUS; | ||
2292 | |||
2293 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
2294 | |||
2295 | /* Only go through if we didn't race with anybody else... */ | ||
2296 | if (pte_none(*page_table)) { | ||
2297 | entry = pfn_pte(pfn, vma->vm_page_prot); | ||
2298 | if (write_access) | ||
2299 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
2300 | set_pte_at(mm, address, page_table, entry); | ||
2301 | } | ||
2302 | pte_unmap_unlock(page_table, ptl); | ||
2303 | return ret; | ||
2304 | } | ||
2305 | |||
2306 | /* | ||
2214 | * Fault of a previously existing named mapping. Repopulate the pte | 2307 | * Fault of a previously existing named mapping. Repopulate the pte |
2215 | * from the encoded file_pte if possible. This enables swappable | 2308 | * from the encoded file_pte if possible. This enables swappable |
2216 | * nonlinear vmas. | 2309 | * nonlinear vmas. |
@@ -2272,11 +2365,17 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2272 | old_entry = entry = *pte; | 2365 | old_entry = entry = *pte; |
2273 | if (!pte_present(entry)) { | 2366 | if (!pte_present(entry)) { |
2274 | if (pte_none(entry)) { | 2367 | if (pte_none(entry)) { |
2275 | if (!vma->vm_ops || !vma->vm_ops->nopage) | 2368 | if (vma->vm_ops) { |
2276 | return do_anonymous_page(mm, vma, address, | 2369 | if (vma->vm_ops->nopage) |
2277 | pte, pmd, write_access); | 2370 | return do_no_page(mm, vma, address, |
2278 | return do_no_page(mm, vma, address, | 2371 | pte, pmd, |
2279 | pte, pmd, write_access); | 2372 | write_access); |
2373 | if (unlikely(vma->vm_ops->nopfn)) | ||
2374 | return do_no_pfn(mm, vma, address, pte, | ||
2375 | pmd, write_access); | ||
2376 | } | ||
2377 | return do_anonymous_page(mm, vma, address, | ||
2378 | pte, pmd, write_access); | ||
2280 | } | 2379 | } |
2281 | if (pte_file(entry)) | 2380 | if (pte_file(entry)) |
2282 | return do_file_page(mm, vma, address, | 2381 | return do_file_page(mm, vma, address, |
@@ -2505,3 +2604,56 @@ int in_gate_area_no_task(unsigned long addr) | |||
2505 | } | 2604 | } |
2506 | 2605 | ||
2507 | #endif /* __HAVE_ARCH_GATE_AREA */ | 2606 | #endif /* __HAVE_ARCH_GATE_AREA */ |
2607 | |||
2608 | /* | ||
2609 | * Access another process' address space. | ||
2610 | * Source/target buffer must be kernel space, | ||
2611 | * Do not walk the page table directly, use get_user_pages | ||
2612 | */ | ||
2613 | int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) | ||
2614 | { | ||
2615 | struct mm_struct *mm; | ||
2616 | struct vm_area_struct *vma; | ||
2617 | struct page *page; | ||
2618 | void *old_buf = buf; | ||
2619 | |||
2620 | mm = get_task_mm(tsk); | ||
2621 | if (!mm) | ||
2622 | return 0; | ||
2623 | |||
2624 | down_read(&mm->mmap_sem); | ||
2625 | /* ignore errors, just check how much was sucessfully transfered */ | ||
2626 | while (len) { | ||
2627 | int bytes, ret, offset; | ||
2628 | void *maddr; | ||
2629 | |||
2630 | ret = get_user_pages(tsk, mm, addr, 1, | ||
2631 | write, 1, &page, &vma); | ||
2632 | if (ret <= 0) | ||
2633 | break; | ||
2634 | |||
2635 | bytes = len; | ||
2636 | offset = addr & (PAGE_SIZE-1); | ||
2637 | if (bytes > PAGE_SIZE-offset) | ||
2638 | bytes = PAGE_SIZE-offset; | ||
2639 | |||
2640 | maddr = kmap(page); | ||
2641 | if (write) { | ||
2642 | copy_to_user_page(vma, page, addr, | ||
2643 | maddr + offset, buf, bytes); | ||
2644 | set_page_dirty_lock(page); | ||
2645 | } else { | ||
2646 | copy_from_user_page(vma, page, addr, | ||
2647 | buf, maddr + offset, bytes); | ||
2648 | } | ||
2649 | kunmap(page); | ||
2650 | page_cache_release(page); | ||
2651 | len -= bytes; | ||
2652 | buf += bytes; | ||
2653 | addr += bytes; | ||
2654 | } | ||
2655 | up_read(&mm->mmap_sem); | ||
2656 | mmput(mm); | ||
2657 | |||
2658 | return buf - old_buf; | ||
2659 | } | ||