diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Makefile | 3 | ||||
| -rw-r--r-- | mm/backing-dev.c | 69 | ||||
| -rw-r--r-- | mm/filemap.c | 109 | ||||
| -rw-r--r-- | mm/hugetlb.c | 25 | ||||
| -rw-r--r-- | mm/memory.c | 1 | ||||
| -rw-r--r-- | mm/mempolicy.c | 2 | ||||
| -rw-r--r-- | mm/migrate.c | 3 | ||||
| -rw-r--r-- | mm/mmap.c | 26 | ||||
| -rw-r--r-- | mm/oom_kill.c | 1 | ||||
| -rw-r--r-- | mm/page-writeback.c | 17 | ||||
| -rw-r--r-- | mm/page_alloc.c | 60 | ||||
| -rw-r--r-- | mm/readahead.c | 2 | ||||
| -rw-r--r-- | mm/rmap.c | 41 | ||||
| -rw-r--r-- | mm/shmem.c | 84 | ||||
| -rw-r--r-- | mm/shmem_acl.c | 2 | ||||
| -rw-r--r-- | mm/slab.c | 13 | ||||
| -rw-r--r-- | mm/sparse.c | 2 | ||||
| -rw-r--r-- | mm/truncate.c | 4 | ||||
| -rw-r--r-- | mm/vmalloc.c | 54 | ||||
| -rw-r--r-- | mm/vmscan.c | 75 | ||||
| -rw-r--r-- | mm/vmstat.c | 2 |
21 files changed, 421 insertions, 174 deletions
diff --git a/mm/Makefile b/mm/Makefile index 12b3a4eee88d..f3c077eb0b8e 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -10,7 +10,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ | |||
| 10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | 10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ |
| 11 | page_alloc.o page-writeback.o pdflush.o \ | 11 | page_alloc.o page-writeback.o pdflush.o \ |
| 12 | readahead.o swap.o truncate.o vmscan.o \ | 12 | readahead.o swap.o truncate.o vmscan.o \ |
| 13 | prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) | 13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
| 14 | $(mmu-y) | ||
| 14 | 15 | ||
| 15 | ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) | 16 | ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) |
| 16 | obj-y += bounce.o | 17 | obj-y += bounce.o |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c new file mode 100644 index 000000000000..f50a2811f9dc --- /dev/null +++ b/mm/backing-dev.c | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | |||
| 2 | #include <linux/wait.h> | ||
| 3 | #include <linux/backing-dev.h> | ||
| 4 | #include <linux/fs.h> | ||
| 5 | #include <linux/sched.h> | ||
| 6 | #include <linux/module.h> | ||
| 7 | |||
| 8 | static wait_queue_head_t congestion_wqh[2] = { | ||
| 9 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), | ||
| 10 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) | ||
| 11 | }; | ||
| 12 | |||
| 13 | |||
| 14 | void clear_bdi_congested(struct backing_dev_info *bdi, int rw) | ||
| 15 | { | ||
| 16 | enum bdi_state bit; | ||
| 17 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
| 18 | |||
| 19 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; | ||
| 20 | clear_bit(bit, &bdi->state); | ||
| 21 | smp_mb__after_clear_bit(); | ||
| 22 | if (waitqueue_active(wqh)) | ||
| 23 | wake_up(wqh); | ||
| 24 | } | ||
| 25 | EXPORT_SYMBOL(clear_bdi_congested); | ||
| 26 | |||
| 27 | void set_bdi_congested(struct backing_dev_info *bdi, int rw) | ||
| 28 | { | ||
| 29 | enum bdi_state bit; | ||
| 30 | |||
| 31 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; | ||
| 32 | set_bit(bit, &bdi->state); | ||
| 33 | } | ||
| 34 | EXPORT_SYMBOL(set_bdi_congested); | ||
| 35 | |||
| 36 | /** | ||
| 37 | * congestion_wait - wait for a backing_dev to become uncongested | ||
| 38 | * @rw: READ or WRITE | ||
| 39 | * @timeout: timeout in jiffies | ||
| 40 | * | ||
| 41 | * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit | ||
| 42 | * write congestion. If no backing_devs are congested then just wait for the | ||
| 43 | * next write to be completed. | ||
| 44 | */ | ||
| 45 | long congestion_wait(int rw, long timeout) | ||
| 46 | { | ||
| 47 | long ret; | ||
| 48 | DEFINE_WAIT(wait); | ||
| 49 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
| 50 | |||
| 51 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); | ||
| 52 | ret = io_schedule_timeout(timeout); | ||
| 53 | finish_wait(wqh, &wait); | ||
| 54 | return ret; | ||
| 55 | } | ||
| 56 | EXPORT_SYMBOL(congestion_wait); | ||
| 57 | |||
| 58 | /** | ||
| 59 | * congestion_end - wake up sleepers on a congested backing_dev_info | ||
| 60 | * @rw: READ or WRITE | ||
| 61 | */ | ||
| 62 | void congestion_end(int rw) | ||
| 63 | { | ||
| 64 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
| 65 | |||
| 66 | if (waitqueue_active(wqh)) | ||
| 67 | wake_up(wqh); | ||
| 68 | } | ||
| 69 | EXPORT_SYMBOL(congestion_end); | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 3464b681f844..7b84dc814347 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -75,8 +75,8 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
| 75 | * ->mmap_sem | 75 | * ->mmap_sem |
| 76 | * ->lock_page (access_process_vm) | 76 | * ->lock_page (access_process_vm) |
| 77 | * | 77 | * |
| 78 | * ->mmap_sem | 78 | * ->i_mutex (generic_file_buffered_write) |
| 79 | * ->i_mutex (msync) | 79 | * ->mmap_sem (fault_in_pages_readable->do_page_fault) |
| 80 | * | 80 | * |
| 81 | * ->i_mutex | 81 | * ->i_mutex |
| 82 | * ->i_alloc_sem (various) | 82 | * ->i_alloc_sem (various) |
| @@ -467,25 +467,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | |||
| 467 | } | 467 | } |
| 468 | 468 | ||
| 469 | #ifdef CONFIG_NUMA | 469 | #ifdef CONFIG_NUMA |
| 470 | struct page *page_cache_alloc(struct address_space *x) | 470 | struct page *__page_cache_alloc(gfp_t gfp) |
| 471 | { | 471 | { |
| 472 | if (cpuset_do_page_mem_spread()) { | 472 | if (cpuset_do_page_mem_spread()) { |
| 473 | int n = cpuset_mem_spread_node(); | 473 | int n = cpuset_mem_spread_node(); |
| 474 | return alloc_pages_node(n, mapping_gfp_mask(x), 0); | 474 | return alloc_pages_node(n, gfp, 0); |
| 475 | } | 475 | } |
| 476 | return alloc_pages(mapping_gfp_mask(x), 0); | 476 | return alloc_pages(gfp, 0); |
| 477 | } | 477 | } |
| 478 | EXPORT_SYMBOL(page_cache_alloc); | 478 | EXPORT_SYMBOL(__page_cache_alloc); |
| 479 | |||
| 480 | struct page *page_cache_alloc_cold(struct address_space *x) | ||
| 481 | { | ||
| 482 | if (cpuset_do_page_mem_spread()) { | ||
| 483 | int n = cpuset_mem_spread_node(); | ||
| 484 | return alloc_pages_node(n, mapping_gfp_mask(x)|__GFP_COLD, 0); | ||
| 485 | } | ||
| 486 | return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0); | ||
| 487 | } | ||
| 488 | EXPORT_SYMBOL(page_cache_alloc_cold); | ||
| 489 | #endif | 479 | #endif |
| 490 | 480 | ||
| 491 | static int __sleep_on_page_lock(void *word) | 481 | static int __sleep_on_page_lock(void *word) |
| @@ -826,7 +816,6 @@ struct page * | |||
| 826 | grab_cache_page_nowait(struct address_space *mapping, unsigned long index) | 816 | grab_cache_page_nowait(struct address_space *mapping, unsigned long index) |
| 827 | { | 817 | { |
| 828 | struct page *page = find_get_page(mapping, index); | 818 | struct page *page = find_get_page(mapping, index); |
| 829 | gfp_t gfp_mask; | ||
| 830 | 819 | ||
| 831 | if (page) { | 820 | if (page) { |
| 832 | if (!TestSetPageLocked(page)) | 821 | if (!TestSetPageLocked(page)) |
| @@ -834,9 +823,8 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index) | |||
| 834 | page_cache_release(page); | 823 | page_cache_release(page); |
| 835 | return NULL; | 824 | return NULL; |
| 836 | } | 825 | } |
| 837 | gfp_mask = mapping_gfp_mask(mapping) & ~__GFP_FS; | 826 | page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS); |
| 838 | page = alloc_pages(gfp_mask, 0); | 827 | if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) { |
| 839 | if (page && add_to_page_cache_lru(page, mapping, index, gfp_mask)) { | ||
| 840 | page_cache_release(page); | 828 | page_cache_release(page); |
| 841 | page = NULL; | 829 | page = NULL; |
| 842 | } | 830 | } |
| @@ -1884,11 +1872,10 @@ repeat: | |||
| 1884 | * if suid or (sgid and xgrp) | 1872 | * if suid or (sgid and xgrp) |
| 1885 | * remove privs | 1873 | * remove privs |
| 1886 | */ | 1874 | */ |
| 1887 | int remove_suid(struct dentry *dentry) | 1875 | int should_remove_suid(struct dentry *dentry) |
| 1888 | { | 1876 | { |
| 1889 | mode_t mode = dentry->d_inode->i_mode; | 1877 | mode_t mode = dentry->d_inode->i_mode; |
| 1890 | int kill = 0; | 1878 | int kill = 0; |
| 1891 | int result = 0; | ||
| 1892 | 1879 | ||
| 1893 | /* suid always must be killed */ | 1880 | /* suid always must be killed */ |
| 1894 | if (unlikely(mode & S_ISUID)) | 1881 | if (unlikely(mode & S_ISUID)) |
| @@ -1901,13 +1888,28 @@ int remove_suid(struct dentry *dentry) | |||
| 1901 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) | 1888 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) |
| 1902 | kill |= ATTR_KILL_SGID; | 1889 | kill |= ATTR_KILL_SGID; |
| 1903 | 1890 | ||
| 1904 | if (unlikely(kill && !capable(CAP_FSETID))) { | 1891 | if (unlikely(kill && !capable(CAP_FSETID))) |
| 1905 | struct iattr newattrs; | 1892 | return kill; |
| 1906 | 1893 | ||
| 1907 | newattrs.ia_valid = ATTR_FORCE | kill; | 1894 | return 0; |
| 1908 | result = notify_change(dentry, &newattrs); | 1895 | } |
| 1909 | } | 1896 | |
| 1910 | return result; | 1897 | int __remove_suid(struct dentry *dentry, int kill) |
| 1898 | { | ||
| 1899 | struct iattr newattrs; | ||
| 1900 | |||
| 1901 | newattrs.ia_valid = ATTR_FORCE | kill; | ||
| 1902 | return notify_change(dentry, &newattrs); | ||
| 1903 | } | ||
| 1904 | |||
| 1905 | int remove_suid(struct dentry *dentry) | ||
| 1906 | { | ||
| 1907 | int kill = should_remove_suid(dentry); | ||
| 1908 | |||
| 1909 | if (unlikely(kill)) | ||
| 1910 | return __remove_suid(dentry, kill); | ||
| 1911 | |||
| 1912 | return 0; | ||
| 1911 | } | 1913 | } |
| 1912 | EXPORT_SYMBOL(remove_suid); | 1914 | EXPORT_SYMBOL(remove_suid); |
| 1913 | 1915 | ||
| @@ -2222,7 +2224,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
| 2222 | unsigned long nr_segs, loff_t *ppos) | 2224 | unsigned long nr_segs, loff_t *ppos) |
| 2223 | { | 2225 | { |
| 2224 | struct file *file = iocb->ki_filp; | 2226 | struct file *file = iocb->ki_filp; |
| 2225 | const struct address_space * mapping = file->f_mapping; | 2227 | struct address_space * mapping = file->f_mapping; |
| 2226 | size_t ocount; /* original count */ | 2228 | size_t ocount; /* original count */ |
| 2227 | size_t count; /* after file limit checks */ | 2229 | size_t count; /* after file limit checks */ |
| 2228 | struct inode *inode = mapping->host; | 2230 | struct inode *inode = mapping->host; |
| @@ -2275,8 +2277,11 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
| 2275 | 2277 | ||
| 2276 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 2278 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
| 2277 | if (unlikely(file->f_flags & O_DIRECT)) { | 2279 | if (unlikely(file->f_flags & O_DIRECT)) { |
| 2278 | written = generic_file_direct_write(iocb, iov, | 2280 | loff_t endbyte; |
| 2279 | &nr_segs, pos, ppos, count, ocount); | 2281 | ssize_t written_buffered; |
| 2282 | |||
| 2283 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, | ||
| 2284 | ppos, count, ocount); | ||
| 2280 | if (written < 0 || written == count) | 2285 | if (written < 0 || written == count) |
| 2281 | goto out; | 2286 | goto out; |
| 2282 | /* | 2287 | /* |
| @@ -2285,10 +2290,46 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, | |||
| 2285 | */ | 2290 | */ |
| 2286 | pos += written; | 2291 | pos += written; |
| 2287 | count -= written; | 2292 | count -= written; |
| 2288 | } | 2293 | written_buffered = generic_file_buffered_write(iocb, iov, |
| 2294 | nr_segs, pos, ppos, count, | ||
| 2295 | written); | ||
| 2296 | /* | ||
| 2297 | * If generic_file_buffered_write() retuned a synchronous error | ||
| 2298 | * then we want to return the number of bytes which were | ||
| 2299 | * direct-written, or the error code if that was zero. Note | ||
| 2300 | * that this differs from normal direct-io semantics, which | ||
| 2301 | * will return -EFOO even if some bytes were written. | ||
| 2302 | */ | ||
| 2303 | if (written_buffered < 0) { | ||
| 2304 | err = written_buffered; | ||
| 2305 | goto out; | ||
| 2306 | } | ||
| 2289 | 2307 | ||
| 2290 | written = generic_file_buffered_write(iocb, iov, nr_segs, | 2308 | /* |
| 2291 | pos, ppos, count, written); | 2309 | * We need to ensure that the page cache pages are written to |
| 2310 | * disk and invalidated to preserve the expected O_DIRECT | ||
| 2311 | * semantics. | ||
| 2312 | */ | ||
| 2313 | endbyte = pos + written_buffered - written - 1; | ||
| 2314 | err = do_sync_file_range(file, pos, endbyte, | ||
| 2315 | SYNC_FILE_RANGE_WAIT_BEFORE| | ||
| 2316 | SYNC_FILE_RANGE_WRITE| | ||
| 2317 | SYNC_FILE_RANGE_WAIT_AFTER); | ||
| 2318 | if (err == 0) { | ||
| 2319 | written = written_buffered; | ||
| 2320 | invalidate_mapping_pages(mapping, | ||
| 2321 | pos >> PAGE_CACHE_SHIFT, | ||
| 2322 | endbyte >> PAGE_CACHE_SHIFT); | ||
| 2323 | } else { | ||
| 2324 | /* | ||
| 2325 | * We don't know how much we wrote, so just return | ||
| 2326 | * the number of bytes which were direct-written | ||
| 2327 | */ | ||
| 2328 | } | ||
| 2329 | } else { | ||
| 2330 | written = generic_file_buffered_write(iocb, iov, nr_segs, | ||
| 2331 | pos, ppos, count, written); | ||
| 2332 | } | ||
| 2292 | out: | 2333 | out: |
| 2293 | current->backing_dev_info = NULL; | 2334 | current->backing_dev_info = NULL; |
| 2294 | return written ? written : err; | 2335 | return written ? written : err; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1d709ff528e1..a088f593a807 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -356,8 +356,8 @@ nomem: | |||
| 356 | return -ENOMEM; | 356 | return -ENOMEM; |
| 357 | } | 357 | } |
| 358 | 358 | ||
| 359 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | 359 | void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, |
| 360 | unsigned long end) | 360 | unsigned long end) |
| 361 | { | 361 | { |
| 362 | struct mm_struct *mm = vma->vm_mm; | 362 | struct mm_struct *mm = vma->vm_mm; |
| 363 | unsigned long address; | 363 | unsigned long address; |
| @@ -398,6 +398,24 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
| 398 | } | 398 | } |
| 399 | } | 399 | } |
| 400 | 400 | ||
| 401 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | ||
| 402 | unsigned long end) | ||
| 403 | { | ||
| 404 | /* | ||
| 405 | * It is undesirable to test vma->vm_file as it should be non-null | ||
| 406 | * for valid hugetlb area. However, vm_file will be NULL in the error | ||
| 407 | * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, | ||
| 408 | * do_mmap_pgoff() nullifies vma->vm_file before calling this function | ||
| 409 | * to clean up. Since no pte has actually been setup, it is safe to | ||
| 410 | * do nothing in this case. | ||
| 411 | */ | ||
| 412 | if (vma->vm_file) { | ||
| 413 | spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); | ||
| 414 | __unmap_hugepage_range(vma, start, end); | ||
| 415 | spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 401 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | 419 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, |
| 402 | unsigned long address, pte_t *ptep, pte_t pte) | 420 | unsigned long address, pte_t *ptep, pte_t pte) |
| 403 | { | 421 | { |
| @@ -460,6 +478,9 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 460 | retry: | 478 | retry: |
| 461 | page = find_lock_page(mapping, idx); | 479 | page = find_lock_page(mapping, idx); |
| 462 | if (!page) { | 480 | if (!page) { |
| 481 | size = i_size_read(mapping->host) >> HPAGE_SHIFT; | ||
| 482 | if (idx >= size) | ||
| 483 | goto out; | ||
| 463 | if (hugetlb_get_quota(mapping)) | 484 | if (hugetlb_get_quota(mapping)) |
| 464 | goto out; | 485 | goto out; |
| 465 | page = alloc_huge_page(vma, address); | 486 | page = alloc_huge_page(vma, address); |
diff --git a/mm/memory.c b/mm/memory.c index b5a4aadd961a..156861fcac43 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1452,6 +1452,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo | |||
| 1452 | if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) | 1452 | if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) |
| 1453 | memset(kaddr, 0, PAGE_SIZE); | 1453 | memset(kaddr, 0, PAGE_SIZE); |
| 1454 | kunmap_atomic(kaddr, KM_USER0); | 1454 | kunmap_atomic(kaddr, KM_USER0); |
| 1455 | flush_dcache_page(dst); | ||
| 1455 | return; | 1456 | return; |
| 1456 | 1457 | ||
| 1457 | } | 1458 | } |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 25788b1b7fcf..617fb31086ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -727,7 +727,7 @@ int do_migrate_pages(struct mm_struct *mm, | |||
| 727 | return -ENOSYS; | 727 | return -ENOSYS; |
| 728 | } | 728 | } |
| 729 | 729 | ||
| 730 | static struct page *new_vma_page(struct page *page, unsigned long private) | 730 | static struct page *new_vma_page(struct page *page, unsigned long private, int **x) |
| 731 | { | 731 | { |
| 732 | return NULL; | 732 | return NULL; |
| 733 | } | 733 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index ba2453f9483d..b4979d423d2b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -952,7 +952,8 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | |||
| 952 | goto out; | 952 | goto out; |
| 953 | 953 | ||
| 954 | pm[i].node = node; | 954 | pm[i].node = node; |
| 955 | } | 955 | } else |
| 956 | pm[i].node = 0; /* anything to not match MAX_NUMNODES */ | ||
| 956 | } | 957 | } |
| 957 | /* End marker */ | 958 | /* End marker */ |
| 958 | pm[nr_pages].node = MAX_NUMNODES; | 959 | pm[nr_pages].node = MAX_NUMNODES; |
| @@ -900,17 +900,6 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, | |||
| 900 | int accountable = 1; | 900 | int accountable = 1; |
| 901 | unsigned long charged = 0, reqprot = prot; | 901 | unsigned long charged = 0, reqprot = prot; |
| 902 | 902 | ||
| 903 | if (file) { | ||
| 904 | if (is_file_hugepages(file)) | ||
| 905 | accountable = 0; | ||
| 906 | |||
| 907 | if (!file->f_op || !file->f_op->mmap) | ||
| 908 | return -ENODEV; | ||
| 909 | |||
| 910 | if ((prot & PROT_EXEC) && | ||
| 911 | (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)) | ||
| 912 | return -EPERM; | ||
| 913 | } | ||
| 914 | /* | 903 | /* |
| 915 | * Does the application expect PROT_READ to imply PROT_EXEC? | 904 | * Does the application expect PROT_READ to imply PROT_EXEC? |
| 916 | * | 905 | * |
| @@ -1000,6 +989,16 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, | |||
| 1000 | case MAP_PRIVATE: | 989 | case MAP_PRIVATE: |
| 1001 | if (!(file->f_mode & FMODE_READ)) | 990 | if (!(file->f_mode & FMODE_READ)) |
| 1002 | return -EACCES; | 991 | return -EACCES; |
| 992 | if (file->f_vfsmnt->mnt_flags & MNT_NOEXEC) { | ||
| 993 | if (vm_flags & VM_EXEC) | ||
| 994 | return -EPERM; | ||
| 995 | vm_flags &= ~VM_MAYEXEC; | ||
| 996 | } | ||
| 997 | if (is_file_hugepages(file)) | ||
| 998 | accountable = 0; | ||
| 999 | |||
| 1000 | if (!file->f_op || !file->f_op->mmap) | ||
| 1001 | return -ENODEV; | ||
| 1003 | break; | 1002 | break; |
| 1004 | 1003 | ||
| 1005 | default: | 1004 | default: |
| @@ -1380,7 +1379,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, | |||
| 1380 | * Check if the given range is hugepage aligned, and | 1379 | * Check if the given range is hugepage aligned, and |
| 1381 | * can be made suitable for hugepages. | 1380 | * can be made suitable for hugepages. |
| 1382 | */ | 1381 | */ |
| 1383 | ret = prepare_hugepage_range(addr, len); | 1382 | ret = prepare_hugepage_range(addr, len, pgoff); |
| 1384 | } else { | 1383 | } else { |
| 1385 | /* | 1384 | /* |
| 1386 | * Ensure that a normal request is not falling in a | 1385 | * Ensure that a normal request is not falling in a |
| @@ -1881,6 +1880,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len) | |||
| 1881 | if ((addr + len) > TASK_SIZE || (addr + len) < addr) | 1880 | if ((addr + len) > TASK_SIZE || (addr + len) < addr) |
| 1882 | return -EINVAL; | 1881 | return -EINVAL; |
| 1883 | 1882 | ||
| 1883 | if (is_hugepage_only_range(mm, addr, len)) | ||
| 1884 | return -EINVAL; | ||
| 1885 | |||
| 1884 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; | 1886 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; |
| 1885 | 1887 | ||
| 1886 | error = arch_mmap_check(addr, len, flags); | 1888 | error = arch_mmap_check(addr, len, flags); |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 20f41b082e16..2e3ce3a928b9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | * kernel subsystems and hints as to where to find out what things do. | 15 | * kernel subsystems and hints as to where to find out what things do. |
| 16 | */ | 16 | */ |
| 17 | 17 | ||
| 18 | #include <linux/oom.h> | ||
| 18 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
| 19 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 20 | #include <linux/swap.h> | 21 | #include <linux/swap.h> |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a0f339057449..8d9b19f239c3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -222,7 +222,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
| 222 | if (pages_written >= write_chunk) | 222 | if (pages_written >= write_chunk) |
| 223 | break; /* We've done our duty */ | 223 | break; /* We've done our duty */ |
| 224 | } | 224 | } |
| 225 | blk_congestion_wait(WRITE, HZ/10); | 225 | congestion_wait(WRITE, HZ/10); |
| 226 | } | 226 | } |
| 227 | 227 | ||
| 228 | if (nr_reclaimable + global_page_state(NR_WRITEBACK) | 228 | if (nr_reclaimable + global_page_state(NR_WRITEBACK) |
| @@ -314,7 +314,7 @@ void throttle_vm_writeout(void) | |||
| 314 | if (global_page_state(NR_UNSTABLE_NFS) + | 314 | if (global_page_state(NR_UNSTABLE_NFS) + |
| 315 | global_page_state(NR_WRITEBACK) <= dirty_thresh) | 315 | global_page_state(NR_WRITEBACK) <= dirty_thresh) |
| 316 | break; | 316 | break; |
| 317 | blk_congestion_wait(WRITE, HZ/10); | 317 | congestion_wait(WRITE, HZ/10); |
| 318 | } | 318 | } |
| 319 | } | 319 | } |
| 320 | 320 | ||
| @@ -351,7 +351,7 @@ static void background_writeout(unsigned long _min_pages) | |||
| 351 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 351 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
| 352 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | 352 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { |
| 353 | /* Wrote less than expected */ | 353 | /* Wrote less than expected */ |
| 354 | blk_congestion_wait(WRITE, HZ/10); | 354 | congestion_wait(WRITE, HZ/10); |
| 355 | if (!wbc.encountered_congestion) | 355 | if (!wbc.encountered_congestion) |
| 356 | break; | 356 | break; |
| 357 | } | 357 | } |
| @@ -422,7 +422,7 @@ static void wb_kupdate(unsigned long arg) | |||
| 422 | writeback_inodes(&wbc); | 422 | writeback_inodes(&wbc); |
| 423 | if (wbc.nr_to_write > 0) { | 423 | if (wbc.nr_to_write > 0) { |
| 424 | if (wbc.encountered_congestion) | 424 | if (wbc.encountered_congestion) |
| 425 | blk_congestion_wait(WRITE, HZ/10); | 425 | congestion_wait(WRITE, HZ/10); |
| 426 | else | 426 | else |
| 427 | break; /* All the old data is written */ | 427 | break; /* All the old data is written */ |
| 428 | } | 428 | } |
| @@ -956,15 +956,6 @@ int test_set_page_writeback(struct page *page) | |||
| 956 | EXPORT_SYMBOL(test_set_page_writeback); | 956 | EXPORT_SYMBOL(test_set_page_writeback); |
| 957 | 957 | ||
| 958 | /* | 958 | /* |
| 959 | * Wakes up tasks that are being throttled due to writeback congestion | ||
| 960 | */ | ||
| 961 | void writeback_congestion_end(void) | ||
| 962 | { | ||
| 963 | blk_congestion_end(WRITE); | ||
| 964 | } | ||
| 965 | EXPORT_SYMBOL(writeback_congestion_end); | ||
| 966 | |||
| 967 | /* | ||
| 968 | * Return true if any of the pages in the mapping are marged with the | 959 | * Return true if any of the pages in the mapping are marged with the |
| 969 | * passed tag. | 960 | * passed tag. |
| 970 | */ | 961 | */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a8c003e7b3d5..bf2f6cff1d6a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/stop_machine.h> | 39 | #include <linux/stop_machine.h> |
| 40 | #include <linux/sort.h> | 40 | #include <linux/sort.h> |
| 41 | #include <linux/pfn.h> | 41 | #include <linux/pfn.h> |
| 42 | #include <linux/backing-dev.h> | ||
| 42 | 43 | ||
| 43 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
| 44 | #include <asm/div64.h> | 45 | #include <asm/div64.h> |
| @@ -495,17 +496,16 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
| 495 | int i; | 496 | int i; |
| 496 | int reserved = 0; | 497 | int reserved = 0; |
| 497 | 498 | ||
| 498 | arch_free_page(page, order); | ||
| 499 | if (!PageHighMem(page)) | ||
| 500 | debug_check_no_locks_freed(page_address(page), | ||
| 501 | PAGE_SIZE<<order); | ||
| 502 | |||
| 503 | for (i = 0 ; i < (1 << order) ; ++i) | 499 | for (i = 0 ; i < (1 << order) ; ++i) |
| 504 | reserved += free_pages_check(page + i); | 500 | reserved += free_pages_check(page + i); |
| 505 | if (reserved) | 501 | if (reserved) |
| 506 | return; | 502 | return; |
| 507 | 503 | ||
| 504 | if (!PageHighMem(page)) | ||
| 505 | debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order); | ||
| 506 | arch_free_page(page, order); | ||
| 508 | kernel_map_pages(page, 1 << order, 0); | 507 | kernel_map_pages(page, 1 << order, 0); |
| 508 | |||
| 509 | local_irq_save(flags); | 509 | local_irq_save(flags); |
| 510 | __count_vm_events(PGFREE, 1 << order); | 510 | __count_vm_events(PGFREE, 1 << order); |
| 511 | free_one_page(page_zone(page), page, order); | 511 | free_one_page(page_zone(page), page, order); |
| @@ -781,13 +781,14 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
| 781 | struct per_cpu_pages *pcp; | 781 | struct per_cpu_pages *pcp; |
| 782 | unsigned long flags; | 782 | unsigned long flags; |
| 783 | 783 | ||
| 784 | arch_free_page(page, 0); | ||
| 785 | |||
| 786 | if (PageAnon(page)) | 784 | if (PageAnon(page)) |
| 787 | page->mapping = NULL; | 785 | page->mapping = NULL; |
| 788 | if (free_pages_check(page)) | 786 | if (free_pages_check(page)) |
| 789 | return; | 787 | return; |
| 790 | 788 | ||
| 789 | if (!PageHighMem(page)) | ||
| 790 | debug_check_no_locks_freed(page_address(page), PAGE_SIZE); | ||
| 791 | arch_free_page(page, 0); | ||
| 791 | kernel_map_pages(page, 1, 0); | 792 | kernel_map_pages(page, 1, 0); |
| 792 | 793 | ||
| 793 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 794 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
| @@ -852,7 +853,7 @@ again: | |||
| 852 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; | 853 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; |
| 853 | local_irq_save(flags); | 854 | local_irq_save(flags); |
| 854 | if (!pcp->count) { | 855 | if (!pcp->count) { |
| 855 | pcp->count += rmqueue_bulk(zone, 0, | 856 | pcp->count = rmqueue_bulk(zone, 0, |
| 856 | pcp->batch, &pcp->list); | 857 | pcp->batch, &pcp->list); |
| 857 | if (unlikely(!pcp->count)) | 858 | if (unlikely(!pcp->count)) |
| 858 | goto failed; | 859 | goto failed; |
| @@ -1050,7 +1051,7 @@ nofail_alloc: | |||
| 1050 | if (page) | 1051 | if (page) |
| 1051 | goto got_pg; | 1052 | goto got_pg; |
| 1052 | if (gfp_mask & __GFP_NOFAIL) { | 1053 | if (gfp_mask & __GFP_NOFAIL) { |
| 1053 | blk_congestion_wait(WRITE, HZ/50); | 1054 | congestion_wait(WRITE, HZ/50); |
| 1054 | goto nofail_alloc; | 1055 | goto nofail_alloc; |
| 1055 | } | 1056 | } |
| 1056 | } | 1057 | } |
| @@ -1113,7 +1114,7 @@ rebalance: | |||
| 1113 | do_retry = 1; | 1114 | do_retry = 1; |
| 1114 | } | 1115 | } |
| 1115 | if (do_retry) { | 1116 | if (do_retry) { |
| 1116 | blk_congestion_wait(WRITE, HZ/50); | 1117 | congestion_wait(WRITE, HZ/50); |
| 1117 | goto rebalance; | 1118 | goto rebalance; |
| 1118 | } | 1119 | } |
| 1119 | 1120 | ||
| @@ -1688,6 +1689,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
| 1688 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | 1689 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
| 1689 | if (!early_pfn_valid(pfn)) | 1690 | if (!early_pfn_valid(pfn)) |
| 1690 | continue; | 1691 | continue; |
| 1692 | if (!early_pfn_in_nid(pfn, nid)) | ||
| 1693 | continue; | ||
| 1691 | page = pfn_to_page(pfn); | 1694 | page = pfn_to_page(pfn); |
| 1692 | set_page_links(page, zone, nid, pfn); | 1695 | set_page_links(page, zone, nid, pfn); |
| 1693 | init_page_count(page); | 1696 | init_page_count(page); |
| @@ -2258,7 +2261,7 @@ unsigned long __init __absent_pages_in_range(int nid, | |||
| 2258 | 2261 | ||
| 2259 | /* Account for ranges past physical memory on this node */ | 2262 | /* Account for ranges past physical memory on this node */ |
| 2260 | if (range_end_pfn > prev_end_pfn) | 2263 | if (range_end_pfn > prev_end_pfn) |
| 2261 | hole_pages = range_end_pfn - | 2264 | hole_pages += range_end_pfn - |
| 2262 | max(range_start_pfn, prev_end_pfn); | 2265 | max(range_start_pfn, prev_end_pfn); |
| 2263 | 2266 | ||
| 2264 | return hole_pages; | 2267 | return hole_pages; |
| @@ -2294,19 +2297,6 @@ unsigned long __init zone_absent_pages_in_node(int nid, | |||
| 2294 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); | 2297 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); |
| 2295 | } | 2298 | } |
| 2296 | 2299 | ||
| 2297 | /* Return the zone index a PFN is in */ | ||
| 2298 | int memmap_zone_idx(struct page *lmem_map) | ||
| 2299 | { | ||
| 2300 | int i; | ||
| 2301 | unsigned long phys_addr = virt_to_phys(lmem_map); | ||
| 2302 | unsigned long pfn = phys_addr >> PAGE_SHIFT; | ||
| 2303 | |||
| 2304 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
| 2305 | if (pfn < arch_zone_highest_possible_pfn[i]) | ||
| 2306 | break; | ||
| 2307 | |||
| 2308 | return i; | ||
| 2309 | } | ||
| 2310 | #else | 2300 | #else |
| 2311 | static inline unsigned long zone_spanned_pages_in_node(int nid, | 2301 | static inline unsigned long zone_spanned_pages_in_node(int nid, |
| 2312 | unsigned long zone_type, | 2302 | unsigned long zone_type, |
| @@ -2325,10 +2315,6 @@ static inline unsigned long zone_absent_pages_in_node(int nid, | |||
| 2325 | return zholes_size[zone_type]; | 2315 | return zholes_size[zone_type]; |
| 2326 | } | 2316 | } |
| 2327 | 2317 | ||
| 2328 | static inline int memmap_zone_idx(struct page *lmem_map) | ||
| 2329 | { | ||
| 2330 | return MAX_NR_ZONES; | ||
| 2331 | } | ||
| 2332 | #endif | 2318 | #endif |
| 2333 | 2319 | ||
| 2334 | static void __init calculate_node_totalpages(struct pglist_data *pgdat, | 2320 | static void __init calculate_node_totalpages(struct pglist_data *pgdat, |
| @@ -2421,7 +2407,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
| 2421 | zone->zone_pgdat = pgdat; | 2407 | zone->zone_pgdat = pgdat; |
| 2422 | zone->free_pages = 0; | 2408 | zone->free_pages = 0; |
| 2423 | 2409 | ||
| 2424 | zone->temp_priority = zone->prev_priority = DEF_PRIORITY; | 2410 | zone->prev_priority = DEF_PRIORITY; |
| 2425 | 2411 | ||
| 2426 | zone_pcp_init(zone); | 2412 | zone_pcp_init(zone); |
| 2427 | INIT_LIST_HEAD(&zone->active_list); | 2413 | INIT_LIST_HEAD(&zone->active_list); |
| @@ -3136,3 +3122,19 @@ unsigned long page_to_pfn(struct page *page) | |||
| 3136 | EXPORT_SYMBOL(pfn_to_page); | 3122 | EXPORT_SYMBOL(pfn_to_page); |
| 3137 | EXPORT_SYMBOL(page_to_pfn); | 3123 | EXPORT_SYMBOL(page_to_pfn); |
| 3138 | #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ | 3124 | #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ |
| 3125 | |||
| 3126 | #if MAX_NUMNODES > 1 | ||
| 3127 | /* | ||
| 3128 | * Find the highest possible node id. | ||
| 3129 | */ | ||
| 3130 | int highest_possible_node_id(void) | ||
| 3131 | { | ||
| 3132 | unsigned int node; | ||
| 3133 | unsigned int highest = 0; | ||
| 3134 | |||
| 3135 | for_each_node_mask(node, node_possible_map) | ||
| 3136 | highest = node; | ||
| 3137 | return highest; | ||
| 3138 | } | ||
| 3139 | EXPORT_SYMBOL(highest_possible_node_id); | ||
| 3140 | #endif | ||
diff --git a/mm/readahead.c b/mm/readahead.c index 1ba736ac0367..23cb61a01c6e 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
| @@ -173,6 +173,8 @@ static int read_pages(struct address_space *mapping, struct file *filp, | |||
| 173 | 173 | ||
| 174 | if (mapping->a_ops->readpages) { | 174 | if (mapping->a_ops->readpages) { |
| 175 | ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); | 175 | ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); |
| 176 | /* Clean up the remaining pages */ | ||
| 177 | put_pages_list(pages); | ||
| 176 | goto out; | 178 | goto out; |
| 177 | } | 179 | } |
| 178 | 180 | ||
| @@ -21,27 +21,21 @@ | |||
| 21 | * Lock ordering in mm: | 21 | * Lock ordering in mm: |
| 22 | * | 22 | * |
| 23 | * inode->i_mutex (while writing or truncating, not reading or faulting) | 23 | * inode->i_mutex (while writing or truncating, not reading or faulting) |
| 24 | * inode->i_alloc_sem | 24 | * inode->i_alloc_sem (vmtruncate_range) |
| 25 | * | 25 | * mm->mmap_sem |
| 26 | * When a page fault occurs in writing from user to file, down_read | 26 | * page->flags PG_locked (lock_page) |
| 27 | * of mmap_sem nests within i_mutex; in sys_msync, i_mutex nests within | 27 | * mapping->i_mmap_lock |
| 28 | * down_read of mmap_sem; i_mutex and down_write of mmap_sem are never | 28 | * anon_vma->lock |
| 29 | * taken together; in truncation, i_mutex is taken outermost. | 29 | * mm->page_table_lock or pte_lock |
| 30 | * | 30 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) |
| 31 | * mm->mmap_sem | 31 | * swap_lock (in swap_duplicate, swap_info_get) |
| 32 | * page->flags PG_locked (lock_page) | 32 | * mmlist_lock (in mmput, drain_mmlist and others) |
| 33 | * mapping->i_mmap_lock | 33 | * mapping->private_lock (in __set_page_dirty_buffers) |
| 34 | * anon_vma->lock | 34 | * inode_lock (in set_page_dirty's __mark_inode_dirty) |
| 35 | * mm->page_table_lock or pte_lock | 35 | * sb_lock (within inode_lock in fs/fs-writeback.c) |
| 36 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) | 36 | * mapping->tree_lock (widely used, in set_page_dirty, |
| 37 | * swap_lock (in swap_duplicate, swap_info_get) | 37 | * in arch-dependent flush_dcache_mmap_lock, |
| 38 | * mmlist_lock (in mmput, drain_mmlist and others) | 38 | * within inode_lock in __sync_single_inode) |
| 39 | * mapping->private_lock (in __set_page_dirty_buffers) | ||
| 40 | * inode_lock (in set_page_dirty's __mark_inode_dirty) | ||
| 41 | * sb_lock (within inode_lock in fs/fs-writeback.c) | ||
| 42 | * mapping->tree_lock (widely used, in set_page_dirty, | ||
| 43 | * in arch-dependent flush_dcache_mmap_lock, | ||
| 44 | * within inode_lock in __sync_single_inode) | ||
| 45 | */ | 39 | */ |
| 46 | 40 | ||
| 47 | #include <linux/mm.h> | 41 | #include <linux/mm.h> |
| @@ -576,15 +570,14 @@ void page_add_file_rmap(struct page *page) | |||
| 576 | void page_remove_rmap(struct page *page) | 570 | void page_remove_rmap(struct page *page) |
| 577 | { | 571 | { |
| 578 | if (atomic_add_negative(-1, &page->_mapcount)) { | 572 | if (atomic_add_negative(-1, &page->_mapcount)) { |
| 579 | #ifdef CONFIG_DEBUG_VM | ||
| 580 | if (unlikely(page_mapcount(page) < 0)) { | 573 | if (unlikely(page_mapcount(page) < 0)) { |
| 581 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); | 574 | printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); |
| 582 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); | 575 | printk (KERN_EMERG " page->flags = %lx\n", page->flags); |
| 583 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); | 576 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); |
| 584 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); | 577 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); |
| 578 | BUG(); | ||
| 585 | } | 579 | } |
| 586 | #endif | 580 | |
| 587 | BUG_ON(page_mapcount(page) < 0); | ||
| 588 | /* | 581 | /* |
| 589 | * It would be tidy to reset the PageAnon mapping here, | 582 | * It would be tidy to reset the PageAnon mapping here, |
| 590 | * but that might overwrite a racing page_add_anon_rmap | 583 | * but that might overwrite a racing page_add_anon_rmap |
diff --git a/mm/shmem.c b/mm/shmem.c index bb8ca7ef7094..4959535fc14c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -48,6 +48,7 @@ | |||
| 48 | #include <linux/ctype.h> | 48 | #include <linux/ctype.h> |
| 49 | #include <linux/migrate.h> | 49 | #include <linux/migrate.h> |
| 50 | #include <linux/highmem.h> | 50 | #include <linux/highmem.h> |
| 51 | #include <linux/backing-dev.h> | ||
| 51 | 52 | ||
| 52 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
| 53 | #include <asm/div64.h> | 54 | #include <asm/div64.h> |
| @@ -1131,7 +1132,7 @@ repeat: | |||
| 1131 | page_cache_release(swappage); | 1132 | page_cache_release(swappage); |
| 1132 | if (error == -ENOMEM) { | 1133 | if (error == -ENOMEM) { |
| 1133 | /* let kswapd refresh zone for GFP_ATOMICs */ | 1134 | /* let kswapd refresh zone for GFP_ATOMICs */ |
| 1134 | blk_congestion_wait(WRITE, HZ/50); | 1135 | congestion_wait(WRITE, HZ/50); |
| 1135 | } | 1136 | } |
| 1136 | goto repeat; | 1137 | goto repeat; |
| 1137 | } | 1138 | } |
| @@ -1362,6 +1363,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
| 1362 | inode->i_mapping->a_ops = &shmem_aops; | 1363 | inode->i_mapping->a_ops = &shmem_aops; |
| 1363 | inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; | 1364 | inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; |
| 1364 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1365 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 1366 | inode->i_generation = get_seconds(); | ||
| 1365 | info = SHMEM_I(inode); | 1367 | info = SHMEM_I(inode); |
| 1366 | memset(info, 0, (char *)inode - (char *)info); | 1368 | memset(info, 0, (char *)inode - (char *)info); |
| 1367 | spin_lock_init(&info->lock); | 1369 | spin_lock_init(&info->lock); |
| @@ -1956,6 +1958,85 @@ static struct xattr_handler *shmem_xattr_handlers[] = { | |||
| 1956 | }; | 1958 | }; |
| 1957 | #endif | 1959 | #endif |
| 1958 | 1960 | ||
| 1961 | static struct dentry *shmem_get_parent(struct dentry *child) | ||
| 1962 | { | ||
| 1963 | return ERR_PTR(-ESTALE); | ||
| 1964 | } | ||
| 1965 | |||
| 1966 | static int shmem_match(struct inode *ino, void *vfh) | ||
| 1967 | { | ||
| 1968 | __u32 *fh = vfh; | ||
| 1969 | __u64 inum = fh[2]; | ||
| 1970 | inum = (inum << 32) | fh[1]; | ||
| 1971 | return ino->i_ino == inum && fh[0] == ino->i_generation; | ||
| 1972 | } | ||
| 1973 | |||
| 1974 | static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) | ||
| 1975 | { | ||
| 1976 | struct dentry *de = NULL; | ||
| 1977 | struct inode *inode; | ||
| 1978 | __u32 *fh = vfh; | ||
| 1979 | __u64 inum = fh[2]; | ||
| 1980 | inum = (inum << 32) | fh[1]; | ||
| 1981 | |||
| 1982 | inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); | ||
| 1983 | if (inode) { | ||
| 1984 | de = d_find_alias(inode); | ||
| 1985 | iput(inode); | ||
| 1986 | } | ||
| 1987 | |||
| 1988 | return de? de: ERR_PTR(-ESTALE); | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, | ||
| 1992 | int len, int type, | ||
| 1993 | int (*acceptable)(void *context, struct dentry *de), | ||
| 1994 | void *context) | ||
| 1995 | { | ||
| 1996 | if (len < 3) | ||
| 1997 | return ERR_PTR(-ESTALE); | ||
| 1998 | |||
| 1999 | return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, | ||
| 2000 | context); | ||
| 2001 | } | ||
| 2002 | |||
| 2003 | static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | ||
| 2004 | int connectable) | ||
| 2005 | { | ||
| 2006 | struct inode *inode = dentry->d_inode; | ||
| 2007 | |||
| 2008 | if (*len < 3) | ||
| 2009 | return 255; | ||
| 2010 | |||
| 2011 | if (hlist_unhashed(&inode->i_hash)) { | ||
| 2012 | /* Unfortunately insert_inode_hash is not idempotent, | ||
| 2013 | * so as we hash inodes here rather than at creation | ||
| 2014 | * time, we need a lock to ensure we only try | ||
| 2015 | * to do it once | ||
| 2016 | */ | ||
| 2017 | static DEFINE_SPINLOCK(lock); | ||
| 2018 | spin_lock(&lock); | ||
| 2019 | if (hlist_unhashed(&inode->i_hash)) | ||
| 2020 | __insert_inode_hash(inode, | ||
| 2021 | inode->i_ino + inode->i_generation); | ||
| 2022 | spin_unlock(&lock); | ||
| 2023 | } | ||
| 2024 | |||
| 2025 | fh[0] = inode->i_generation; | ||
| 2026 | fh[1] = inode->i_ino; | ||
| 2027 | fh[2] = ((__u64)inode->i_ino) >> 32; | ||
| 2028 | |||
| 2029 | *len = 3; | ||
| 2030 | return 1; | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | static struct export_operations shmem_export_ops = { | ||
| 2034 | .get_parent = shmem_get_parent, | ||
| 2035 | .get_dentry = shmem_get_dentry, | ||
| 2036 | .encode_fh = shmem_encode_fh, | ||
| 2037 | .decode_fh = shmem_decode_fh, | ||
| 2038 | }; | ||
| 2039 | |||
| 1959 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, | 2040 | static int shmem_parse_options(char *options, int *mode, uid_t *uid, |
| 1960 | gid_t *gid, unsigned long *blocks, unsigned long *inodes, | 2041 | gid_t *gid, unsigned long *blocks, unsigned long *inodes, |
| 1961 | int *policy, nodemask_t *policy_nodes) | 2042 | int *policy, nodemask_t *policy_nodes) |
| @@ -2128,6 +2209,7 @@ static int shmem_fill_super(struct super_block *sb, | |||
| 2128 | &inodes, &policy, &policy_nodes)) | 2209 | &inodes, &policy, &policy_nodes)) |
| 2129 | return -EINVAL; | 2210 | return -EINVAL; |
| 2130 | } | 2211 | } |
| 2212 | sb->s_export_op = &shmem_export_ops; | ||
| 2131 | #else | 2213 | #else |
| 2132 | sb->s_flags |= MS_NOUSER; | 2214 | sb->s_flags |= MS_NOUSER; |
| 2133 | #endif | 2215 | #endif |
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c index c946bf468718..f5664c5b9eb1 100644 --- a/mm/shmem_acl.c +++ b/mm/shmem_acl.c | |||
| @@ -35,7 +35,7 @@ shmem_get_acl(struct inode *inode, int type) | |||
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | /** | 37 | /** |
| 38 | * shmem_get_acl - generic_acl_operations->setacl() operation | 38 | * shmem_set_acl - generic_acl_operations->setacl() operation |
| 39 | */ | 39 | */ |
| 40 | static void | 40 | static void |
| 41 | shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl) | 41 | shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl) |
| @@ -883,7 +883,7 @@ static void init_reap_node(int cpu) | |||
| 883 | if (node == MAX_NUMNODES) | 883 | if (node == MAX_NUMNODES) |
| 884 | node = first_node(node_online_map); | 884 | node = first_node(node_online_map); |
| 885 | 885 | ||
| 886 | __get_cpu_var(reap_node) = node; | 886 | per_cpu(reap_node, cpu) = node; |
| 887 | } | 887 | } |
| 888 | 888 | ||
| 889 | static void next_reap_node(void) | 889 | static void next_reap_node(void) |
| @@ -3152,12 +3152,15 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | |||
| 3152 | struct zone **z; | 3152 | struct zone **z; |
| 3153 | void *obj = NULL; | 3153 | void *obj = NULL; |
| 3154 | 3154 | ||
| 3155 | for (z = zonelist->zones; *z && !obj; z++) | 3155 | for (z = zonelist->zones; *z && !obj; z++) { |
| 3156 | int nid = zone_to_nid(*z); | ||
| 3157 | |||
| 3156 | if (zone_idx(*z) <= ZONE_NORMAL && | 3158 | if (zone_idx(*z) <= ZONE_NORMAL && |
| 3157 | cpuset_zone_allowed(*z, flags)) | 3159 | cpuset_zone_allowed(*z, flags) && |
| 3160 | cache->nodelists[nid]) | ||
| 3158 | obj = __cache_alloc_node(cache, | 3161 | obj = __cache_alloc_node(cache, |
| 3159 | flags | __GFP_THISNODE, | 3162 | flags | __GFP_THISNODE, nid); |
| 3160 | zone_to_nid(*z)); | 3163 | } |
| 3161 | return obj; | 3164 | return obj; |
| 3162 | } | 3165 | } |
| 3163 | 3166 | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 86c52ab80878..b3c82ba30012 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
| @@ -211,7 +211,7 @@ static struct page *__kmalloc_section_memmap(unsigned long nr_pages) | |||
| 211 | struct page *page, *ret; | 211 | struct page *page, *ret; |
| 212 | unsigned long memmap_size = sizeof(struct page) * nr_pages; | 212 | unsigned long memmap_size = sizeof(struct page) * nr_pages; |
| 213 | 213 | ||
| 214 | page = alloc_pages(GFP_KERNEL, get_order(memmap_size)); | 214 | page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); |
| 215 | if (page) | 215 | if (page) |
| 216 | goto got_map_page; | 216 | goto got_map_page; |
| 217 | 217 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index f4edbc179d14..e07b1e682c38 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -96,7 +96,6 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) | |||
| 96 | return 0; | 96 | return 0; |
| 97 | 97 | ||
| 98 | ret = remove_mapping(mapping, page); | 98 | ret = remove_mapping(mapping, page); |
| 99 | ClearPageUptodate(page); | ||
| 100 | 99 | ||
| 101 | return ret; | 100 | return ret; |
| 102 | } | 101 | } |
| @@ -302,7 +301,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
| 302 | if (page->mapping != mapping) | 301 | if (page->mapping != mapping) |
| 303 | return 0; | 302 | return 0; |
| 304 | 303 | ||
| 305 | if (PagePrivate(page) && !try_to_release_page(page, 0)) | 304 | if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) |
| 306 | return 0; | 305 | return 0; |
| 307 | 306 | ||
| 308 | write_lock_irq(&mapping->tree_lock); | 307 | write_lock_irq(&mapping->tree_lock); |
| @@ -396,6 +395,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, | |||
| 396 | pagevec_release(&pvec); | 395 | pagevec_release(&pvec); |
| 397 | cond_resched(); | 396 | cond_resched(); |
| 398 | } | 397 | } |
| 398 | WARN_ON_ONCE(ret); | ||
| 399 | return ret; | 399 | return ret; |
| 400 | } | 400 | } |
| 401 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); | 401 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 750ab6ed13fc..86897ee792d6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -160,13 +160,15 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | |||
| 160 | return err; | 160 | return err; |
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, | 163 | static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, |
| 164 | unsigned long start, unsigned long end, int node) | 164 | unsigned long start, unsigned long end, |
| 165 | int node, gfp_t gfp_mask) | ||
| 165 | { | 166 | { |
| 166 | struct vm_struct **p, *tmp, *area; | 167 | struct vm_struct **p, *tmp, *area; |
| 167 | unsigned long align = 1; | 168 | unsigned long align = 1; |
| 168 | unsigned long addr; | 169 | unsigned long addr; |
| 169 | 170 | ||
| 171 | BUG_ON(in_interrupt()); | ||
| 170 | if (flags & VM_IOREMAP) { | 172 | if (flags & VM_IOREMAP) { |
| 171 | int bit = fls(size); | 173 | int bit = fls(size); |
| 172 | 174 | ||
| @@ -179,16 +181,13 @@ struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, | |||
| 179 | } | 181 | } |
| 180 | addr = ALIGN(start, align); | 182 | addr = ALIGN(start, align); |
| 181 | size = PAGE_ALIGN(size); | 183 | size = PAGE_ALIGN(size); |
| 184 | if (unlikely(!size)) | ||
| 185 | return NULL; | ||
| 182 | 186 | ||
| 183 | area = kmalloc_node(sizeof(*area), GFP_KERNEL, node); | 187 | area = kmalloc_node(sizeof(*area), gfp_mask & GFP_LEVEL_MASK, node); |
| 184 | if (unlikely(!area)) | 188 | if (unlikely(!area)) |
| 185 | return NULL; | 189 | return NULL; |
| 186 | 190 | ||
| 187 | if (unlikely(!size)) { | ||
| 188 | kfree (area); | ||
| 189 | return NULL; | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | 191 | /* |
| 193 | * We always allocate a guard page. | 192 | * We always allocate a guard page. |
| 194 | */ | 193 | */ |
| @@ -236,7 +235,7 @@ out: | |||
| 236 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | 235 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, |
| 237 | unsigned long start, unsigned long end) | 236 | unsigned long start, unsigned long end) |
| 238 | { | 237 | { |
| 239 | return __get_vm_area_node(size, flags, start, end, -1); | 238 | return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL); |
| 240 | } | 239 | } |
| 241 | 240 | ||
| 242 | /** | 241 | /** |
| @@ -253,9 +252,11 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) | |||
| 253 | return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END); | 252 | return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END); |
| 254 | } | 253 | } |
| 255 | 254 | ||
| 256 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node) | 255 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, |
| 256 | int node, gfp_t gfp_mask) | ||
| 257 | { | 257 | { |
| 258 | return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node); | 258 | return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node, |
| 259 | gfp_mask); | ||
| 259 | } | 260 | } |
| 260 | 261 | ||
| 261 | /* Caller must hold vmlist_lock */ | 262 | /* Caller must hold vmlist_lock */ |
| @@ -428,8 +429,11 @@ void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
| 428 | if (array_size > PAGE_SIZE) { | 429 | if (array_size > PAGE_SIZE) { |
| 429 | pages = __vmalloc_node(array_size, gfp_mask, PAGE_KERNEL, node); | 430 | pages = __vmalloc_node(array_size, gfp_mask, PAGE_KERNEL, node); |
| 430 | area->flags |= VM_VPAGES; | 431 | area->flags |= VM_VPAGES; |
| 431 | } else | 432 | } else { |
| 432 | pages = kmalloc_node(array_size, (gfp_mask & ~__GFP_HIGHMEM), node); | 433 | pages = kmalloc_node(array_size, |
| 434 | (gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO)), | ||
| 435 | node); | ||
| 436 | } | ||
| 433 | area->pages = pages; | 437 | area->pages = pages; |
| 434 | if (!area->pages) { | 438 | if (!area->pages) { |
| 435 | remove_vm_area(area->addr); | 439 | remove_vm_area(area->addr); |
| @@ -484,7 +488,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | |||
| 484 | if (!size || (size >> PAGE_SHIFT) > num_physpages) | 488 | if (!size || (size >> PAGE_SHIFT) > num_physpages) |
| 485 | return NULL; | 489 | return NULL; |
| 486 | 490 | ||
| 487 | area = get_vm_area_node(size, VM_ALLOC, node); | 491 | area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask); |
| 488 | if (!area) | 492 | if (!area) |
| 489 | return NULL; | 493 | return NULL; |
| 490 | 494 | ||
| @@ -525,11 +529,12 @@ void *vmalloc_user(unsigned long size) | |||
| 525 | void *ret; | 529 | void *ret; |
| 526 | 530 | ||
| 527 | ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); | 531 | ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); |
| 528 | write_lock(&vmlist_lock); | 532 | if (ret) { |
| 529 | area = __find_vm_area(ret); | 533 | write_lock(&vmlist_lock); |
| 530 | area->flags |= VM_USERMAP; | 534 | area = __find_vm_area(ret); |
| 531 | write_unlock(&vmlist_lock); | 535 | area->flags |= VM_USERMAP; |
| 532 | 536 | write_unlock(&vmlist_lock); | |
| 537 | } | ||
| 533 | return ret; | 538 | return ret; |
| 534 | } | 539 | } |
| 535 | EXPORT_SYMBOL(vmalloc_user); | 540 | EXPORT_SYMBOL(vmalloc_user); |
| @@ -598,11 +603,12 @@ void *vmalloc_32_user(unsigned long size) | |||
| 598 | void *ret; | 603 | void *ret; |
| 599 | 604 | ||
| 600 | ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); | 605 | ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); |
| 601 | write_lock(&vmlist_lock); | 606 | if (ret) { |
| 602 | area = __find_vm_area(ret); | 607 | write_lock(&vmlist_lock); |
| 603 | area->flags |= VM_USERMAP; | 608 | area = __find_vm_area(ret); |
| 604 | write_unlock(&vmlist_lock); | 609 | area->flags |= VM_USERMAP; |
| 605 | 610 | write_unlock(&vmlist_lock); | |
| 611 | } | ||
| 606 | return ret; | 612 | return ret; |
| 607 | } | 613 | } |
| 608 | EXPORT_SYMBOL(vmalloc_32_user); | 614 | EXPORT_SYMBOL(vmalloc_32_user); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index eca70310adb2..518540a4a2a6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -378,6 +378,12 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) | |||
| 378 | return PAGE_CLEAN; | 378 | return PAGE_CLEAN; |
| 379 | } | 379 | } |
| 380 | 380 | ||
| 381 | /* | ||
| 382 | * Attempt to detach a locked page from its ->mapping. If it is dirty or if | ||
| 383 | * someone else has a ref on the page, abort and return 0. If it was | ||
| 384 | * successfully detached, return 1. Assumes the caller has a single ref on | ||
| 385 | * this page. | ||
| 386 | */ | ||
| 381 | int remove_mapping(struct address_space *mapping, struct page *page) | 387 | int remove_mapping(struct address_space *mapping, struct page *page) |
| 382 | { | 388 | { |
| 383 | BUG_ON(!PageLocked(page)); | 389 | BUG_ON(!PageLocked(page)); |
| @@ -717,6 +723,20 @@ done: | |||
| 717 | return nr_reclaimed; | 723 | return nr_reclaimed; |
| 718 | } | 724 | } |
| 719 | 725 | ||
| 726 | /* | ||
| 727 | * We are about to scan this zone at a certain priority level. If that priority | ||
| 728 | * level is smaller (ie: more urgent) than the previous priority, then note | ||
| 729 | * that priority level within the zone. This is done so that when the next | ||
| 730 | * process comes in to scan this zone, it will immediately start out at this | ||
| 731 | * priority level rather than having to build up its own scanning priority. | ||
| 732 | * Here, this priority affects only the reclaim-mapped threshold. | ||
| 733 | */ | ||
| 734 | static inline void note_zone_scanning_priority(struct zone *zone, int priority) | ||
| 735 | { | ||
| 736 | if (priority < zone->prev_priority) | ||
| 737 | zone->prev_priority = priority; | ||
| 738 | } | ||
| 739 | |||
| 720 | static inline int zone_is_near_oom(struct zone *zone) | 740 | static inline int zone_is_near_oom(struct zone *zone) |
| 721 | { | 741 | { |
| 722 | return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; | 742 | return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; |
| @@ -740,7 +760,7 @@ static inline int zone_is_near_oom(struct zone *zone) | |||
| 740 | * But we had to alter page->flags anyway. | 760 | * But we had to alter page->flags anyway. |
| 741 | */ | 761 | */ |
| 742 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | 762 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, |
| 743 | struct scan_control *sc) | 763 | struct scan_control *sc, int priority) |
| 744 | { | 764 | { |
| 745 | unsigned long pgmoved; | 765 | unsigned long pgmoved; |
| 746 | int pgdeactivate = 0; | 766 | int pgdeactivate = 0; |
| @@ -764,7 +784,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 764 | * `distress' is a measure of how much trouble we're having | 784 | * `distress' is a measure of how much trouble we're having |
| 765 | * reclaiming pages. 0 -> no problems. 100 -> great trouble. | 785 | * reclaiming pages. 0 -> no problems. 100 -> great trouble. |
| 766 | */ | 786 | */ |
| 767 | distress = 100 >> zone->prev_priority; | 787 | distress = 100 >> min(zone->prev_priority, priority); |
| 768 | 788 | ||
| 769 | /* | 789 | /* |
| 770 | * The point of this algorithm is to decide when to start | 790 | * The point of this algorithm is to decide when to start |
| @@ -916,7 +936,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
| 916 | nr_to_scan = min(nr_active, | 936 | nr_to_scan = min(nr_active, |
| 917 | (unsigned long)sc->swap_cluster_max); | 937 | (unsigned long)sc->swap_cluster_max); |
| 918 | nr_active -= nr_to_scan; | 938 | nr_active -= nr_to_scan; |
| 919 | shrink_active_list(nr_to_scan, zone, sc); | 939 | shrink_active_list(nr_to_scan, zone, sc, priority); |
| 920 | } | 940 | } |
| 921 | 941 | ||
| 922 | if (nr_inactive) { | 942 | if (nr_inactive) { |
| @@ -966,9 +986,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, | |||
| 966 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 986 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
| 967 | continue; | 987 | continue; |
| 968 | 988 | ||
| 969 | zone->temp_priority = priority; | 989 | note_zone_scanning_priority(zone, priority); |
| 970 | if (zone->prev_priority > priority) | ||
| 971 | zone->prev_priority = priority; | ||
| 972 | 990 | ||
| 973 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 991 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
| 974 | continue; /* Let kswapd poll it */ | 992 | continue; /* Let kswapd poll it */ |
| @@ -1018,7 +1036,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
| 1018 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 1036 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
| 1019 | continue; | 1037 | continue; |
| 1020 | 1038 | ||
| 1021 | zone->temp_priority = DEF_PRIORITY; | ||
| 1022 | lru_pages += zone->nr_active + zone->nr_inactive; | 1039 | lru_pages += zone->nr_active + zone->nr_inactive; |
| 1023 | } | 1040 | } |
| 1024 | 1041 | ||
| @@ -1053,19 +1070,28 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
| 1053 | 1070 | ||
| 1054 | /* Take a nap, wait for some writeback to complete */ | 1071 | /* Take a nap, wait for some writeback to complete */ |
| 1055 | if (sc.nr_scanned && priority < DEF_PRIORITY - 2) | 1072 | if (sc.nr_scanned && priority < DEF_PRIORITY - 2) |
| 1056 | blk_congestion_wait(WRITE, HZ/10); | 1073 | congestion_wait(WRITE, HZ/10); |
| 1057 | } | 1074 | } |
| 1058 | /* top priority shrink_caches still had more to do? don't OOM, then */ | 1075 | /* top priority shrink_caches still had more to do? don't OOM, then */ |
| 1059 | if (!sc.all_unreclaimable) | 1076 | if (!sc.all_unreclaimable) |
| 1060 | ret = 1; | 1077 | ret = 1; |
| 1061 | out: | 1078 | out: |
| 1079 | /* | ||
| 1080 | * Now that we've scanned all the zones at this priority level, note | ||
| 1081 | * that level within the zone so that the next thread which performs | ||
| 1082 | * scanning of this zone will immediately start out at this priority | ||
| 1083 | * level. This affects only the decision whether or not to bring | ||
| 1084 | * mapped pages onto the inactive list. | ||
| 1085 | */ | ||
| 1086 | if (priority < 0) | ||
| 1087 | priority = 0; | ||
| 1062 | for (i = 0; zones[i] != 0; i++) { | 1088 | for (i = 0; zones[i] != 0; i++) { |
| 1063 | struct zone *zone = zones[i]; | 1089 | struct zone *zone = zones[i]; |
| 1064 | 1090 | ||
| 1065 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 1091 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
| 1066 | continue; | 1092 | continue; |
| 1067 | 1093 | ||
| 1068 | zone->prev_priority = zone->temp_priority; | 1094 | zone->prev_priority = priority; |
| 1069 | } | 1095 | } |
| 1070 | return ret; | 1096 | return ret; |
| 1071 | } | 1097 | } |
| @@ -1105,6 +1131,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
| 1105 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1131 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
| 1106 | .swappiness = vm_swappiness, | 1132 | .swappiness = vm_swappiness, |
| 1107 | }; | 1133 | }; |
| 1134 | /* | ||
| 1135 | * temp_priority is used to remember the scanning priority at which | ||
| 1136 | * this zone was successfully refilled to free_pages == pages_high. | ||
| 1137 | */ | ||
| 1138 | int temp_priority[MAX_NR_ZONES]; | ||
| 1108 | 1139 | ||
| 1109 | loop_again: | 1140 | loop_again: |
| 1110 | total_scanned = 0; | 1141 | total_scanned = 0; |
| @@ -1112,11 +1143,8 @@ loop_again: | |||
| 1112 | sc.may_writepage = !laptop_mode; | 1143 | sc.may_writepage = !laptop_mode; |
| 1113 | count_vm_event(PAGEOUTRUN); | 1144 | count_vm_event(PAGEOUTRUN); |
| 1114 | 1145 | ||
| 1115 | for (i = 0; i < pgdat->nr_zones; i++) { | 1146 | for (i = 0; i < pgdat->nr_zones; i++) |
| 1116 | struct zone *zone = pgdat->node_zones + i; | 1147 | temp_priority[i] = DEF_PRIORITY; |
| 1117 | |||
| 1118 | zone->temp_priority = DEF_PRIORITY; | ||
| 1119 | } | ||
| 1120 | 1148 | ||
| 1121 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 1149 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
| 1122 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 1150 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
| @@ -1177,10 +1205,9 @@ scan: | |||
| 1177 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1205 | if (!zone_watermark_ok(zone, order, zone->pages_high, |
| 1178 | end_zone, 0)) | 1206 | end_zone, 0)) |
| 1179 | all_zones_ok = 0; | 1207 | all_zones_ok = 0; |
| 1180 | zone->temp_priority = priority; | 1208 | temp_priority[i] = priority; |
| 1181 | if (zone->prev_priority > priority) | ||
| 1182 | zone->prev_priority = priority; | ||
| 1183 | sc.nr_scanned = 0; | 1209 | sc.nr_scanned = 0; |
| 1210 | note_zone_scanning_priority(zone, priority); | ||
| 1184 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 1211 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
| 1185 | reclaim_state->reclaimed_slab = 0; | 1212 | reclaim_state->reclaimed_slab = 0; |
| 1186 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1213 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
| @@ -1208,7 +1235,7 @@ scan: | |||
| 1208 | * another pass across the zones. | 1235 | * another pass across the zones. |
| 1209 | */ | 1236 | */ |
| 1210 | if (total_scanned && priority < DEF_PRIORITY - 2) | 1237 | if (total_scanned && priority < DEF_PRIORITY - 2) |
| 1211 | blk_congestion_wait(WRITE, HZ/10); | 1238 | congestion_wait(WRITE, HZ/10); |
| 1212 | 1239 | ||
| 1213 | /* | 1240 | /* |
| 1214 | * We do this so kswapd doesn't build up large priorities for | 1241 | * We do this so kswapd doesn't build up large priorities for |
| @@ -1220,10 +1247,15 @@ scan: | |||
| 1220 | break; | 1247 | break; |
| 1221 | } | 1248 | } |
| 1222 | out: | 1249 | out: |
| 1250 | /* | ||
| 1251 | * Note within each zone the priority level at which this zone was | ||
| 1252 | * brought into a happy state. So that the next thread which scans this | ||
| 1253 | * zone will start out at that priority level. | ||
| 1254 | */ | ||
| 1223 | for (i = 0; i < pgdat->nr_zones; i++) { | 1255 | for (i = 0; i < pgdat->nr_zones; i++) { |
| 1224 | struct zone *zone = pgdat->node_zones + i; | 1256 | struct zone *zone = pgdat->node_zones + i; |
| 1225 | 1257 | ||
| 1226 | zone->prev_priority = zone->temp_priority; | 1258 | zone->prev_priority = temp_priority[i]; |
| 1227 | } | 1259 | } |
| 1228 | if (!all_zones_ok) { | 1260 | if (!all_zones_ok) { |
| 1229 | cond_resched(); | 1261 | cond_resched(); |
| @@ -1352,7 +1384,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int pass, | |||
| 1352 | if (zone->nr_scan_active >= nr_pages || pass > 3) { | 1384 | if (zone->nr_scan_active >= nr_pages || pass > 3) { |
| 1353 | zone->nr_scan_active = 0; | 1385 | zone->nr_scan_active = 0; |
| 1354 | nr_to_scan = min(nr_pages, zone->nr_active); | 1386 | nr_to_scan = min(nr_pages, zone->nr_active); |
| 1355 | shrink_active_list(nr_to_scan, zone, sc); | 1387 | shrink_active_list(nr_to_scan, zone, sc, prio); |
| 1356 | } | 1388 | } |
| 1357 | } | 1389 | } |
| 1358 | 1390 | ||
| @@ -1452,7 +1484,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
| 1452 | goto out; | 1484 | goto out; |
| 1453 | 1485 | ||
| 1454 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | 1486 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) |
| 1455 | blk_congestion_wait(WRITE, HZ / 10); | 1487 | congestion_wait(WRITE, HZ / 10); |
| 1456 | } | 1488 | } |
| 1457 | 1489 | ||
| 1458 | lru_pages = 0; | 1490 | lru_pages = 0; |
| @@ -1608,6 +1640,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 1608 | */ | 1640 | */ |
| 1609 | priority = ZONE_RECLAIM_PRIORITY; | 1641 | priority = ZONE_RECLAIM_PRIORITY; |
| 1610 | do { | 1642 | do { |
| 1643 | note_zone_scanning_priority(zone, priority); | ||
| 1611 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 1644 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
| 1612 | priority--; | 1645 | priority--; |
| 1613 | } while (priority >= 0 && nr_reclaimed < nr_pages); | 1646 | } while (priority >= 0 && nr_reclaimed < nr_pages); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 45b124e012f5..8614e8f6743b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
| @@ -587,11 +587,9 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
| 587 | seq_printf(m, | 587 | seq_printf(m, |
| 588 | "\n all_unreclaimable: %u" | 588 | "\n all_unreclaimable: %u" |
| 589 | "\n prev_priority: %i" | 589 | "\n prev_priority: %i" |
| 590 | "\n temp_priority: %i" | ||
| 591 | "\n start_pfn: %lu", | 590 | "\n start_pfn: %lu", |
| 592 | zone->all_unreclaimable, | 591 | zone->all_unreclaimable, |
| 593 | zone->prev_priority, | 592 | zone->prev_priority, |
| 594 | zone->temp_priority, | ||
| 595 | zone->zone_start_pfn); | 593 | zone->zone_start_pfn); |
| 596 | spin_unlock_irqrestore(&zone->lock, flags); | 594 | spin_unlock_irqrestore(&zone->lock, flags); |
| 597 | seq_putc(m, '\n'); | 595 | seq_putc(m, '\n'); |
