diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 194 |
1 files changed, 173 insertions, 21 deletions
diff --git a/mm/memory.c b/mm/memory.c index 109e9866237e..601159a46ab6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
| 50 | #include <linux/delayacct.h> | 50 | #include <linux/delayacct.h> |
| 51 | #include <linux/init.h> | 51 | #include <linux/init.h> |
| 52 | #include <linux/writeback.h> | ||
| 52 | 53 | ||
| 53 | #include <asm/pgalloc.h> | 54 | #include <asm/pgalloc.h> |
| 54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
| @@ -1226,7 +1227,12 @@ out: | |||
| 1226 | return retval; | 1227 | return retval; |
| 1227 | } | 1228 | } |
| 1228 | 1229 | ||
| 1229 | /* | 1230 | /** |
| 1231 | * vm_insert_page - insert single page into user vma | ||
| 1232 | * @vma: user vma to map to | ||
| 1233 | * @addr: target user address of this page | ||
| 1234 | * @page: source kernel page | ||
| 1235 | * | ||
| 1230 | * This allows drivers to insert individual pages they've allocated | 1236 | * This allows drivers to insert individual pages they've allocated |
| 1231 | * into a user vma. | 1237 | * into a user vma. |
| 1232 | * | 1238 | * |
| @@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, | |||
| 1318 | return 0; | 1324 | return 0; |
| 1319 | } | 1325 | } |
| 1320 | 1326 | ||
| 1321 | /* Note: this is only safe if the mm semaphore is held when called. */ | 1327 | /** |
| 1328 | * remap_pfn_range - remap kernel memory to userspace | ||
| 1329 | * @vma: user vma to map to | ||
| 1330 | * @addr: target user address to start at | ||
| 1331 | * @pfn: physical address of kernel memory | ||
| 1332 | * @size: size of map area | ||
| 1333 | * @prot: page protection flags for this mapping | ||
| 1334 | * | ||
| 1335 | * Note: this is only safe if the mm semaphore is held when called. | ||
| 1336 | */ | ||
| 1322 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | 1337 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, |
| 1323 | unsigned long pfn, unsigned long size, pgprot_t prot) | 1338 | unsigned long pfn, unsigned long size, pgprot_t prot) |
| 1324 | { | 1339 | { |
| @@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1458 | { | 1473 | { |
| 1459 | struct page *old_page, *new_page; | 1474 | struct page *old_page, *new_page; |
| 1460 | pte_t entry; | 1475 | pte_t entry; |
| 1461 | int reuse, ret = VM_FAULT_MINOR; | 1476 | int reuse = 0, ret = VM_FAULT_MINOR; |
| 1477 | struct page *dirty_page = NULL; | ||
| 1462 | 1478 | ||
| 1463 | old_page = vm_normal_page(vma, address, orig_pte); | 1479 | old_page = vm_normal_page(vma, address, orig_pte); |
| 1464 | if (!old_page) | 1480 | if (!old_page) |
| 1465 | goto gotten; | 1481 | goto gotten; |
| 1466 | 1482 | ||
| 1467 | if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == | 1483 | /* |
| 1468 | (VM_SHARED|VM_WRITE))) { | 1484 | * Take out anonymous pages first, anonymous shared vmas are |
| 1485 | * not dirty accountable. | ||
| 1486 | */ | ||
| 1487 | if (PageAnon(old_page)) { | ||
| 1488 | if (!TestSetPageLocked(old_page)) { | ||
| 1489 | reuse = can_share_swap_page(old_page); | ||
| 1490 | unlock_page(old_page); | ||
| 1491 | } | ||
| 1492 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
| 1493 | (VM_WRITE|VM_SHARED))) { | ||
| 1494 | /* | ||
| 1495 | * Only catch write-faults on shared writable pages, | ||
| 1496 | * read-only shared pages can get COWed by | ||
| 1497 | * get_user_pages(.write=1, .force=1). | ||
| 1498 | */ | ||
| 1469 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | 1499 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { |
| 1470 | /* | 1500 | /* |
| 1471 | * Notify the address space that the page is about to | 1501 | * Notify the address space that the page is about to |
| @@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1494 | if (!pte_same(*page_table, orig_pte)) | 1524 | if (!pte_same(*page_table, orig_pte)) |
| 1495 | goto unlock; | 1525 | goto unlock; |
| 1496 | } | 1526 | } |
| 1497 | 1527 | dirty_page = old_page; | |
| 1528 | get_page(dirty_page); | ||
| 1498 | reuse = 1; | 1529 | reuse = 1; |
| 1499 | } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | ||
| 1500 | reuse = can_share_swap_page(old_page); | ||
| 1501 | unlock_page(old_page); | ||
| 1502 | } else { | ||
| 1503 | reuse = 0; | ||
| 1504 | } | 1530 | } |
| 1505 | 1531 | ||
| 1506 | if (reuse) { | 1532 | if (reuse) { |
| @@ -1566,6 +1592,10 @@ gotten: | |||
| 1566 | page_cache_release(old_page); | 1592 | page_cache_release(old_page); |
| 1567 | unlock: | 1593 | unlock: |
| 1568 | pte_unmap_unlock(page_table, ptl); | 1594 | pte_unmap_unlock(page_table, ptl); |
| 1595 | if (dirty_page) { | ||
| 1596 | set_page_dirty_balance(dirty_page); | ||
| 1597 | put_page(dirty_page); | ||
| 1598 | } | ||
| 1569 | return ret; | 1599 | return ret; |
| 1570 | oom: | 1600 | oom: |
| 1571 | if (old_page) | 1601 | if (old_page) |
| @@ -1785,9 +1815,10 @@ void unmap_mapping_range(struct address_space *mapping, | |||
| 1785 | } | 1815 | } |
| 1786 | EXPORT_SYMBOL(unmap_mapping_range); | 1816 | EXPORT_SYMBOL(unmap_mapping_range); |
| 1787 | 1817 | ||
| 1788 | /* | 1818 | /** |
| 1789 | * Handle all mappings that got truncated by a "truncate()" | 1819 | * vmtruncate - unmap mappings "freed" by truncate() syscall |
| 1790 | * system call. | 1820 | * @inode: inode of the file used |
| 1821 | * @offset: file offset to start truncating | ||
| 1791 | * | 1822 | * |
| 1792 | * NOTE! We have to be ready to update the memory sharing | 1823 | * NOTE! We have to be ready to update the memory sharing |
| 1793 | * between the file and the memory map for a potential last | 1824 | * between the file and the memory map for a potential last |
| @@ -1856,11 +1887,16 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) | |||
| 1856 | } | 1887 | } |
| 1857 | EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ | 1888 | EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */ |
| 1858 | 1889 | ||
| 1859 | /* | 1890 | /** |
| 1891 | * swapin_readahead - swap in pages in hope we need them soon | ||
| 1892 | * @entry: swap entry of this memory | ||
| 1893 | * @addr: address to start | ||
| 1894 | * @vma: user vma this addresses belong to | ||
| 1895 | * | ||
| 1860 | * Primitive swap readahead code. We simply read an aligned block of | 1896 | * Primitive swap readahead code. We simply read an aligned block of |
| 1861 | * (1 << page_cluster) entries in the swap area. This method is chosen | 1897 | * (1 << page_cluster) entries in the swap area. This method is chosen |
| 1862 | * because it doesn't cost us any seek time. We also make sure to queue | 1898 | * because it doesn't cost us any seek time. We also make sure to queue |
| 1863 | * the 'original' request together with the readahead ones... | 1899 | * the 'original' request together with the readahead ones... |
| 1864 | * | 1900 | * |
| 1865 | * This has been extended to use the NUMA policies from the mm triggering | 1901 | * This has been extended to use the NUMA policies from the mm triggering |
| 1866 | * the readahead. | 1902 | * the readahead. |
| @@ -2098,6 +2134,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2098 | unsigned int sequence = 0; | 2134 | unsigned int sequence = 0; |
| 2099 | int ret = VM_FAULT_MINOR; | 2135 | int ret = VM_FAULT_MINOR; |
| 2100 | int anon = 0; | 2136 | int anon = 0; |
| 2137 | struct page *dirty_page = NULL; | ||
| 2101 | 2138 | ||
| 2102 | pte_unmap(page_table); | 2139 | pte_unmap(page_table); |
| 2103 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2140 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
| @@ -2192,6 +2229,10 @@ retry: | |||
| 2192 | } else { | 2229 | } else { |
| 2193 | inc_mm_counter(mm, file_rss); | 2230 | inc_mm_counter(mm, file_rss); |
| 2194 | page_add_file_rmap(new_page); | 2231 | page_add_file_rmap(new_page); |
| 2232 | if (write_access) { | ||
| 2233 | dirty_page = new_page; | ||
| 2234 | get_page(dirty_page); | ||
| 2235 | } | ||
| 2195 | } | 2236 | } |
| 2196 | } else { | 2237 | } else { |
| 2197 | /* One of our sibling threads was faster, back out. */ | 2238 | /* One of our sibling threads was faster, back out. */ |
| @@ -2204,6 +2245,10 @@ retry: | |||
| 2204 | lazy_mmu_prot_update(entry); | 2245 | lazy_mmu_prot_update(entry); |
| 2205 | unlock: | 2246 | unlock: |
| 2206 | pte_unmap_unlock(page_table, ptl); | 2247 | pte_unmap_unlock(page_table, ptl); |
| 2248 | if (dirty_page) { | ||
| 2249 | set_page_dirty_balance(dirty_page); | ||
| 2250 | put_page(dirty_page); | ||
| 2251 | } | ||
| 2207 | return ret; | 2252 | return ret; |
| 2208 | oom: | 2253 | oom: |
| 2209 | page_cache_release(new_page); | 2254 | page_cache_release(new_page); |
| @@ -2211,6 +2256,54 @@ oom: | |||
| 2211 | } | 2256 | } |
| 2212 | 2257 | ||
| 2213 | /* | 2258 | /* |
| 2259 | * do_no_pfn() tries to create a new page mapping for a page without | ||
| 2260 | * a struct_page backing it | ||
| 2261 | * | ||
| 2262 | * As this is called only for pages that do not currently exist, we | ||
| 2263 | * do not need to flush old virtual caches or the TLB. | ||
| 2264 | * | ||
| 2265 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
| 2266 | * but allow concurrent faults), and pte mapped but not yet locked. | ||
| 2267 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
| 2268 | * | ||
| 2269 | * It is expected that the ->nopfn handler always returns the same pfn | ||
| 2270 | * for a given virtual mapping. | ||
| 2271 | * | ||
| 2272 | * Mark this `noinline' to prevent it from bloating the main pagefault code. | ||
| 2273 | */ | ||
| 2274 | static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, | ||
| 2275 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
| 2276 | int write_access) | ||
| 2277 | { | ||
| 2278 | spinlock_t *ptl; | ||
| 2279 | pte_t entry; | ||
| 2280 | unsigned long pfn; | ||
| 2281 | int ret = VM_FAULT_MINOR; | ||
| 2282 | |||
| 2283 | pte_unmap(page_table); | ||
| 2284 | BUG_ON(!(vma->vm_flags & VM_PFNMAP)); | ||
| 2285 | BUG_ON(is_cow_mapping(vma->vm_flags)); | ||
| 2286 | |||
| 2287 | pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); | ||
| 2288 | if (pfn == NOPFN_OOM) | ||
| 2289 | return VM_FAULT_OOM; | ||
| 2290 | if (pfn == NOPFN_SIGBUS) | ||
| 2291 | return VM_FAULT_SIGBUS; | ||
| 2292 | |||
| 2293 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
| 2294 | |||
| 2295 | /* Only go through if we didn't race with anybody else... */ | ||
| 2296 | if (pte_none(*page_table)) { | ||
| 2297 | entry = pfn_pte(pfn, vma->vm_page_prot); | ||
| 2298 | if (write_access) | ||
| 2299 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
| 2300 | set_pte_at(mm, address, page_table, entry); | ||
| 2301 | } | ||
| 2302 | pte_unmap_unlock(page_table, ptl); | ||
| 2303 | return ret; | ||
| 2304 | } | ||
| 2305 | |||
| 2306 | /* | ||
| 2214 | * Fault of a previously existing named mapping. Repopulate the pte | 2307 | * Fault of a previously existing named mapping. Repopulate the pte |
| 2215 | * from the encoded file_pte if possible. This enables swappable | 2308 | * from the encoded file_pte if possible. This enables swappable |
| 2216 | * nonlinear vmas. | 2309 | * nonlinear vmas. |
| @@ -2272,11 +2365,17 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
| 2272 | old_entry = entry = *pte; | 2365 | old_entry = entry = *pte; |
| 2273 | if (!pte_present(entry)) { | 2366 | if (!pte_present(entry)) { |
| 2274 | if (pte_none(entry)) { | 2367 | if (pte_none(entry)) { |
| 2275 | if (!vma->vm_ops || !vma->vm_ops->nopage) | 2368 | if (vma->vm_ops) { |
| 2276 | return do_anonymous_page(mm, vma, address, | 2369 | if (vma->vm_ops->nopage) |
| 2277 | pte, pmd, write_access); | 2370 | return do_no_page(mm, vma, address, |
| 2278 | return do_no_page(mm, vma, address, | 2371 | pte, pmd, |
| 2279 | pte, pmd, write_access); | 2372 | write_access); |
| 2373 | if (unlikely(vma->vm_ops->nopfn)) | ||
| 2374 | return do_no_pfn(mm, vma, address, pte, | ||
| 2375 | pmd, write_access); | ||
| 2376 | } | ||
| 2377 | return do_anonymous_page(mm, vma, address, | ||
| 2378 | pte, pmd, write_access); | ||
| 2280 | } | 2379 | } |
| 2281 | if (pte_file(entry)) | 2380 | if (pte_file(entry)) |
| 2282 | return do_file_page(mm, vma, address, | 2381 | return do_file_page(mm, vma, address, |
| @@ -2505,3 +2604,56 @@ int in_gate_area_no_task(unsigned long addr) | |||
| 2505 | } | 2604 | } |
| 2506 | 2605 | ||
| 2507 | #endif /* __HAVE_ARCH_GATE_AREA */ | 2606 | #endif /* __HAVE_ARCH_GATE_AREA */ |
| 2607 | |||
| 2608 | /* | ||
| 2609 | * Access another process' address space. | ||
| 2610 | * Source/target buffer must be kernel space, | ||
| 2611 | * Do not walk the page table directly, use get_user_pages | ||
| 2612 | */ | ||
| 2613 | int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) | ||
| 2614 | { | ||
| 2615 | struct mm_struct *mm; | ||
| 2616 | struct vm_area_struct *vma; | ||
| 2617 | struct page *page; | ||
| 2618 | void *old_buf = buf; | ||
| 2619 | |||
| 2620 | mm = get_task_mm(tsk); | ||
| 2621 | if (!mm) | ||
| 2622 | return 0; | ||
| 2623 | |||
| 2624 | down_read(&mm->mmap_sem); | ||
| 2625 | /* ignore errors, just check how much was sucessfully transfered */ | ||
| 2626 | while (len) { | ||
| 2627 | int bytes, ret, offset; | ||
| 2628 | void *maddr; | ||
| 2629 | |||
| 2630 | ret = get_user_pages(tsk, mm, addr, 1, | ||
| 2631 | write, 1, &page, &vma); | ||
| 2632 | if (ret <= 0) | ||
| 2633 | break; | ||
| 2634 | |||
| 2635 | bytes = len; | ||
| 2636 | offset = addr & (PAGE_SIZE-1); | ||
| 2637 | if (bytes > PAGE_SIZE-offset) | ||
| 2638 | bytes = PAGE_SIZE-offset; | ||
| 2639 | |||
| 2640 | maddr = kmap(page); | ||
| 2641 | if (write) { | ||
| 2642 | copy_to_user_page(vma, page, addr, | ||
| 2643 | maddr + offset, buf, bytes); | ||
| 2644 | set_page_dirty_lock(page); | ||
| 2645 | } else { | ||
| 2646 | copy_from_user_page(vma, page, addr, | ||
| 2647 | buf, maddr + offset, bytes); | ||
| 2648 | } | ||
| 2649 | kunmap(page); | ||
| 2650 | page_cache_release(page); | ||
| 2651 | len -= bytes; | ||
| 2652 | buf += bytes; | ||
| 2653 | addr += bytes; | ||
| 2654 | } | ||
| 2655 | up_read(&mm->mmap_sem); | ||
| 2656 | mmput(mm); | ||
| 2657 | |||
| 2658 | return buf - old_buf; | ||
| 2659 | } | ||
